From cda0cb10932ddac75e6fa2929e15ec8e97c15fe0 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Sun, 11 Jan 2026 22:42:02 +0100
Subject: [PATCH] hotfix workflows dynamic and automation after pr integrations

---
 modules/datamodels/datamodelChat.py           |  3 +
 modules/interfaces/interfaceDbChatObjects.py  | 44 +++++----
 .../serviceAi/subContentExtraction.py         | 24 ++++-
 .../services/serviceChat/mainServiceChat.py   | 11 +++
 .../mainServiceSharepoint.py                  | 22 +++--
 modules/shared/progressLogger.py              | 11 ++-
 .../methods/methodAi/actions/process.py       |  8 +-
 .../actions/uploadDocument.py                 | 13 ++-
 .../helpers/documentParsing.py                | 95 +++++++++++++------
 modules/workflows/workflowManager.py          | 18 +++-
 10 files changed, 183 insertions(+), 66 deletions(-)

diff --git a/modules/datamodels/datamodelChat.py b/modules/datamodels/datamodelChat.py
index 4800752d..33e2b3b3 100644
--- a/modules/datamodels/datamodelChat.py
+++ b/modules/datamodels/datamodelChat.py
@@ -69,6 +69,9 @@ class ChatLog(BaseModel):
     operationId: Optional[str] = Field(
         None, description="Operation ID to group related log entries"
     )
+    roundNumber: Optional[int] = Field(None, description="Round number in workflow")
+    taskNumber: Optional[int] = Field(None, description="Task number within round")
+    actionNumber: Optional[int] = Field(None, description="Action number within task")
 
 
 registerModelLabels(
diff --git a/modules/interfaces/interfaceDbChatObjects.py b/modules/interfaces/interfaceDbChatObjects.py
index 86fa7c30..9b03e5fc 100644
--- a/modules/interfaces/interfaceDbChatObjects.py
+++ b/modules/interfaces/interfaceDbChatObjects.py
@@ -1411,26 +1411,28 @@ class ChatObjects:
         # Create log in normalized table
         createdLog = self.db.recordCreate(ChatLog, log_model)
         
-        # Emit log event for streaming (if event manager is available)
-        try:
-            from modules.features.chatbot.eventManager import get_event_manager
-            event_manager = get_event_manager()
-            log_timestamp = parseTimestamp(createdLog.get("timestamp"), default=getUtcTimestamp())
-            # Emit log event in exact chatData format: {type, createdAt, item}
-            asyncio.create_task(event_manager.emit_event(
-                workflowId,
-                "chatdata",
-                "New log",
-                "log",
-                {
-                    "type": "log",
-                    "createdAt": log_timestamp,
-                    "item": ChatLog(**createdLog).dict()
-                }
-            ))
-        except Exception as e:
-            # Event manager not available or error - continue without emitting
-            logger.debug(f"Could not emit log event: {e}")
+        # Emit log event for streaming (only for chatbot workflows)
+        # Only emit events for chatbot workflows, not for automation or dynamic workflows
+        if workflow.workflowMode == WorkflowModeEnum.WORKFLOW_CHATBOT:
+            try:
+                from modules.features.chatbot.eventManager import get_event_manager
+                event_manager = get_event_manager()
+                log_timestamp = parseTimestamp(createdLog.get("timestamp"), default=getUtcTimestamp())
+                # Emit log event in exact chatData format: {type, createdAt, item}
+                asyncio.create_task(event_manager.emit_event(
+                    workflowId,
+                    "chatdata",
+                    "New log",
+                    "log",
+                    {
+                        "type": "log",
+                        "createdAt": log_timestamp,
+                        "item": ChatLog(**createdLog).model_dump()
+                    }
+                ))
+            except Exception as e:
+                # Event manager not available or error - continue without emitting
+                logger.debug(f"Could not emit log event: {e}")
         
         # Return validated ChatLog instance
         return ChatLog(**createdLog)
@@ -1541,7 +1543,7 @@ class ChatObjects:
                 "item": chatMessage
             })
         
-        # Get logs
+        # Get logs - return all logs with roundNumber if available
         logs = getRecordsetWithRBAC(self.db, ChatLog, self.currentUser, recordFilter={"workflowId": workflowId})
         for log in logs:
             # Apply timestamp filtering in Python
diff --git a/modules/services/serviceAi/subContentExtraction.py b/modules/services/serviceAi/subContentExtraction.py
index 005c6b44..a866f68f 100644
--- a/modules/services/serviceAi/subContentExtraction.py
+++ b/modules/services/serviceAi/subContentExtraction.py
@@ -138,6 +138,13 @@ class ContentExtractor:
                             
                             logger.debug(f"Part {part.id}: reference={hasReferenceIntent}, render={hasRenderIntent}, extract={hasExtractIntent}, hasData={hasPartData}")
                             
+                            # SAFETY: For images with any intent, always ensure render is included
+                            # This ensures the image object part is always available for later rendering
+                            isImage = part.typeGroup == "image" or (part.mimeType and part.mimeType.startswith("image/"))
+                            if isImage and hasPartData and not hasRenderIntent:
+                                logger.info(f"🖼️ Auto-adding render intent for image {part.id} (original intents: {partIntent})")
+                                hasRenderIntent = True
+                            
                             # Track ob der originale Part bereits hinzugefügt wurde
                             originalPartAdded = False
                             
@@ -332,7 +339,13 @@ class ContentExtractor:
                 # WICHTIG: "render" und "extract" können beide vorhanden sein!
                 # In diesem Fall erzeugen wir BEIDE ContentParts
                 
-                if "render" in intent.intents:
+                # SAFETY: For images with any intent, always create object part for later rendering
+                isImageDocument = document.mimeType and document.mimeType.startswith("image/")
+                shouldAutoRender = isImageDocument and "render" not in intent.intents and ("extract" in intent.intents or "reference" in intent.intents)
+                if shouldAutoRender:
+                    logger.info(f"🖼️ Auto-adding render for image document {document.id} (original intents: {intent.intents})")
+                
+                if "render" in intent.intents or shouldAutoRender:
                     # Für Images/Binary: extrahiere als Object
                     if document.mimeType.startswith("image/") or self._isBinary(document.mimeType):
                         try:
@@ -391,6 +404,9 @@ class ContentExtractor:
                     )
                     
                     # Konvertiere extrahierte Ergebnisse zu ContentParts mit Metadaten
+                    # Check if object part exists (either explicit render or auto-render for images)
+                    hasObjectPart = "render" in intent.intents or shouldAutoRender
+                    
                     for extracted in extractedResults:
                         for part in extracted.parts:
                             # Markiere als extracted Format
@@ -400,8 +416,8 @@ class ContentExtractor:
                                 "extractionPrompt": extractionPrompt,
                                 "intent": "extract",
                                 "usageHint": f"Use extracted content from {document.fileName}",
-                                # Verknüpfung zu object Part (falls vorhanden)
-                                "relatedObjectPartId": f"obj_{document.id}" if "render" in intent.intents else None
+                                # Verknüpfung zu object Part (falls vorhanden - including auto-render for images)
+                                "relatedObjectPartId": f"obj_{document.id}" if hasObjectPart else None
                             })
                             
                             # For images: Mark that Vision AI extraction is needed during section generation
@@ -410,7 +426,7 @@ class ContentExtractor:
                                 logger.info(f"📷 Image part {part.id} marked for Vision AI extraction during section generation")
                             
                             # Stelle sicher, dass ID eindeutig ist (falls object Part existiert)
-                            if "render" in intent.intents:
+                            if hasObjectPart:
                                 part.id = f"ext_{document.id}_{part.id}"
                             allContentParts.append(part)
             
diff --git a/modules/services/serviceChat/mainServiceChat.py b/modules/services/serviceChat/mainServiceChat.py
index fc07d5af..137dcd05 100644
--- a/modules/services/serviceChat/mainServiceChat.py
+++ b/modules/services/serviceChat/mainServiceChat.py
@@ -647,6 +647,17 @@ class ChatService:
         """Persist ChatLog and map it into the in-memory workflow logs list."""
         logData = dict(logData or {})
         logData["workflowId"] = workflow.id
+        
+        # Auto-populate roundNumber from workflow's currentRound if not explicitly set
+        if "roundNumber" not in logData or logData["roundNumber"] is None:
+            currentRound = getattr(workflow, 'currentRound', None)
+            # Default to 1 if workflow doesn't have currentRound set
+            if currentRound is None:
+                currentRound = 1
+                logger.warning(f"storeLog: workflow.currentRound is None, defaulting to 1")
+            logData["roundNumber"] = currentRound
+            logger.debug(f"storeLog: Set roundNumber={currentRound} for log: {logData.get('message', '')[:50]}")
+        
         chatInterface = self.interfaceDbChat
         chatLog = chatInterface.createLog(logData)
         if not chatLog:
diff --git a/modules/services/serviceSharepoint/mainServiceSharepoint.py b/modules/services/serviceSharepoint/mainServiceSharepoint.py
index e393b695..7ee89669 100644
--- a/modules/services/serviceSharepoint/mainServiceSharepoint.py
+++ b/modules/services/serviceSharepoint/mainServiceSharepoint.py
@@ -731,7 +731,7 @@ class SharepointService:
     async def getSiteByStandardPath(self, sitePath: str, allSites: Optional[List[Dict[str, Any]]] = None) -> Optional[Dict[str, Any]]:
         """
         Get SharePoint site directly by Microsoft-standard path (/sites/SiteName)
-        without loading all sites. Uses hostname from first available site.
+        without loading all sites. Uses hostname from root site (single API call).
         
         Parameters:
             sitePath (str): Site path like 'company-share' (without /sites/ prefix)
@@ -741,18 +741,28 @@ class SharepointService:
             Optional[Dict[str, Any]]: Site information if found, None otherwise
         """
         try:
-            # Get hostname from first available site (minimal load - only 1 site)
+            from urllib.parse import urlparse
+            hostname = None
+            
+            # Get hostname - priority order: allSites > root site API > full discovery (fallback)
             if allSites and len(allSites) > 0:
-                from urllib.parse import urlparse
                 webUrl = allSites[0].get("webUrl", "")
                 hostname = urlparse(webUrl).hostname if webUrl else None
-            else:
-                # Discover minimal sites to get hostname
+            
+            if not hostname:
+                # Get hostname from root site (single efficient API call instead of discovering all sites)
+                rootSite = await self._makeGraphApiCall("sites/root")
+                if rootSite and "webUrl" in rootSite and "error" not in rootSite:
+                    hostname = urlparse(rootSite.get("webUrl", "")).hostname
+                    logger.debug(f"Got hostname '{hostname}' from root site (efficient)")
+            
+            if not hostname:
+                # Fallback: discover all sites (expensive, avoid if possible)
+                logger.warning("Could not get hostname from root site, falling back to full site discovery")
                 minimalSites = await self.discoverSites()
                 if not minimalSites:
                     logger.warning("No sites available to extract hostname")
                     return None
-                from urllib.parse import urlparse
                 hostname = urlparse(minimalSites[0].get("webUrl", "")).hostname
             
             if not hostname:
diff --git a/modules/shared/progressLogger.py b/modules/shared/progressLogger.py
index 04561fe4..d12a1562 100644
--- a/modules/shared/progressLogger.py
+++ b/modules/shared/progressLogger.py
@@ -149,6 +149,13 @@ class ProgressLogger:
                     # Parent operation never existed - log warning
                     logger.debug(f"WARNING: Parent operation '{parentOperationId}' not found in activeOperations when creating log for '{operationId}'. Available operations: {list(self.activeOperations.keys())}. Child operation may appear at root level.")
         
+        # Get round number from workflow - include in operationId for unique per-round operations
+        roundNumber = getattr(workflow, 'currentRound', None) or 1
+        
+        # Create round-specific operationId and parentId for the log
+        roundOperationId = f"{operationId}_r{roundNumber}"
+        roundParentId = f"{parentOperationId}_r{roundNumber}" if parentOperationId else None
+        
         # parentId in ChatLog should be the operationId of the parent operation, not the log entry ID
         logData = {
             "workflowId": workflow.id,
@@ -156,8 +163,8 @@ class ProgressLogger:
             "type": "info",
             "status": status,
             "progress": progress,
-            "operationId": operationId,
-            "parentId": parentOperationId  # Set to parent's operationId, not log entry ID
+            "operationId": roundOperationId,
+            "parentId": roundParentId  # Set to parent's operationId, not log entry ID
         }
         
         try:
diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py
index b8f81465..f804c0b9 100644
--- a/modules/workflows/methods/methodAi/actions/process.py
+++ b/modules/workflows/methods/methodAi/actions/process.py
@@ -192,8 +192,14 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
             # automatically in _extractAndPrepareContent() (Phase 5B)
             logger.info(f"ai.process: Calling callAiContent with {len(documentList.references)} document references")
             if documentList.references:
+                from modules.datamodels.datamodelDocref import DocumentListReference, DocumentItemReference
                 for idx, ref in enumerate(documentList.references):
-                    logger.info(f"  Passing reference {idx + 1}: documentId={ref.documentId}")
+                    if isinstance(ref, DocumentItemReference):
+                        logger.info(f"  Passing reference {idx + 1}: documentId={ref.documentId}")
+                    elif isinstance(ref, DocumentListReference):
+                        logger.info(f"  Passing reference {idx + 1}: label={ref.label}")
+                    else:
+                        logger.info(f"  Passing reference {idx + 1}: {ref}")
             
             aiResponse = await self.services.ai.callAiContent(
                 prompt=aiPrompt,
diff --git a/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py b/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
index cfe4cf86..e9361853 100644
--- a/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
+++ b/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
@@ -142,8 +142,17 @@ async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
                         logger.info(f"Using folder ID upload endpoint: {uploadEndpoint}")
                     else:
                         # It's a regular path - use the root-based upload endpoint
-                        uploadPath = uploadPath.rstrip('/') + '/' + fileName
-                        uploadPathClean = uploadPath.lstrip('/')
+                        # Strip /sites/{siteName}/ prefix if present (Graph API path is relative to site's drive)
+                        uploadPathForApi = uploadPath
+                        if uploadPathForApi.startswith('/sites/'):
+                            # Extract path after /sites/{siteName}/
+                            parts = uploadPathForApi.split('/', 3)  # ['', 'sites', 'sitename', 'rest/of/path']
+                            if len(parts) >= 4:
+                                uploadPathForApi = '/' + parts[3]  # Keep the rest after /sites/sitename/
+                                logger.info(f"Stripped site prefix from upload path: '{uploadPath}' -> '{uploadPathForApi}'")
+                        
+                        uploadPathForApi = uploadPathForApi.rstrip('/') + '/' + fileName
+                        uploadPathClean = uploadPathForApi.lstrip('/')
                         uploadEndpoint = f"sites/{siteId}/drive/root:/{uploadPathClean}:/content"
                         logger.info(f"Using path-based upload endpoint: {uploadEndpoint}")
                     
diff --git a/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py b/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py
index 138e2ea3..9903568d 100644
--- a/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py
+++ b/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py
@@ -62,6 +62,18 @@ class DocumentParsingHelper:
             
             try:
                 resultData = json.loads(fileData)
+                
+                # Handle nested structure: documentData.data may contain the actual JSON as a string
+                if "documentData" in resultData and isinstance(resultData.get("documentData"), dict):
+                    innerData = resultData["documentData"].get("data")
+                    if innerData and isinstance(innerData, str):
+                        try:
+                            # Parse the inner JSON string
+                            resultData = json.loads(innerData)
+                            logger.debug(f"Parsed nested documentData.data structure")
+                        except json.JSONDecodeError:
+                            logger.debug(f"documentData.data is not valid JSON, using as-is")
+                
                 foundDocuments = resultData.get("foundDocuments", [])
                 
                 # If no foundDocuments, check if it's a listDocuments result (has listResults)
@@ -202,40 +214,67 @@ class DocumentParsingHelper:
             
             if fileData:
                 try:
-                    resultData = json.loads(fileData)
-                    foundDocuments = resultData.get("foundDocuments", [])
+                    # Check if fileData is binary (not text/JSON)
+                    # Binary files (xlsx, pdf, etc.) can't be parsed as JSON
+                    isBinaryFile = False
+                    if isinstance(fileData, bytes):
+                        try:
+                            fileData = fileData.decode('utf-8')
+                        except UnicodeDecodeError:
+                            # Binary file - cannot parse as JSON
+                            isBinaryFile = True
+                            logger.debug(f"File is binary (not UTF-8 decodable), treating as regular file to upload")
                     
-                    if foundDocuments:
-                        # Extract folder path from first found document
-                        firstDoc = foundDocuments[0]
-                        parentPath = firstDoc.get("parentPath", "")
-                        if parentPath:
-                            folderPath = parentPath
+                    if isBinaryFile:
+                        # Binary file - treat as regular file to upload
+                        filesToUpload = chatDocuments
+                    else:
+                        resultData = json.loads(fileData)
                         
-                        # Extract site information
-                        siteName = firstDoc.get("siteName")
-                        siteId = firstDoc.get("siteId")
+                        # Handle nested structure: documentData.data may contain the actual JSON as a string
+                        if "documentData" in resultData and isinstance(resultData.get("documentData"), dict):
+                            innerData = resultData["documentData"].get("data")
+                            if innerData and isinstance(innerData, str):
+                                try:
+                                    # Parse the inner JSON string
+                                    resultData = json.loads(innerData)
+                                    logger.debug(f"Parsed nested documentData.data structure for folder parsing")
+                                except json.JSONDecodeError:
+                                    logger.debug(f"documentData.data is not valid JSON, using as-is")
                         
-                        if siteName and siteId:
-                            sites = [{
-                                "id": siteId,
-                                "displayName": siteName,
-                                "webUrl": firstDoc.get("webUrl", "")
-                            }]
-                        elif siteName:
-                            # Discover sites to find siteId
-                            allSites = await self.method.siteDiscovery.discoverSharePointSites()
-                            matchingSites = self.method.siteDiscovery.filterSitesByHint(allSites, siteName)
-                            if matchingSites:
+                        foundDocuments = resultData.get("foundDocuments", [])
+                        
+                        if foundDocuments:
+                            # Extract folder path from first found document
+                            firstDoc = foundDocuments[0]
+                            parentPath = firstDoc.get("parentPath", "")
+                            if parentPath:
+                                folderPath = parentPath
+                            
+                            # Extract site information
+                            siteName = firstDoc.get("siteName")
+                            siteId = firstDoc.get("siteId")
+                            
+                            if siteName and siteId:
                                 sites = [{
-                                    "id": matchingSites[0].get("id"),
+                                    "id": siteId,
                                     "displayName": siteName,
-                                    "webUrl": matchingSites[0].get("webUrl", "")
+                                    "webUrl": firstDoc.get("webUrl", "")
                                 }]
-                    
-                    # For uploadDocument: filesToUpload are the chatDocuments themselves
-                    # (they contain the files to upload)
-                    filesToUpload = chatDocuments
+                            elif siteName:
+                                # Discover sites to find siteId
+                                allSites = await self.method.siteDiscovery.discoverSharePointSites()
+                                matchingSites = self.method.siteDiscovery.filterSitesByHint(allSites, siteName)
+                                if matchingSites:
+                                    sites = [{
+                                        "id": matchingSites[0].get("id"),
+                                        "displayName": siteName,
+                                        "webUrl": matchingSites[0].get("webUrl", "")
+                                    }]
+                        
+                        # For uploadDocument: filesToUpload are the chatDocuments themselves
+                        # (they contain the files to upload)
+                        filesToUpload = chatDocuments
                     
                 except json.JSONDecodeError:
                     # Not a findDocumentPath result - treat as regular files to upload
diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py
index 9806060a..980ce120 100644
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@@ -209,6 +209,17 @@ class WorkflowManager:
                 complexity = analysisResult.get('complexity', 'moderate')
                 needsWorkflowHistory = analysisResult.get('needsWorkflowHistory', False)
                 fastTrack = analysisResult.get('fastTrack', False)
+                workflowName = analysisResult.get('workflowName')
+                
+                # Update workflow name if provided by analysis
+                if workflowName and workflowName.strip():
+                    try:
+                        workflow = self.services.workflow
+                        if workflow:
+                            self.services.chat.updateWorkflow(workflow.id, {"name": workflowName.strip()})
+                            logger.debug(f"Updated workflow {workflow.id} name to: {workflowName.strip()}")
+                    except Exception as e:
+                        logger.warning(f"Failed to update workflow name: {e}")
                 
                 # Extract intent analysis fields and store as workflowIntent
                 workflowIntent = {
@@ -324,6 +335,7 @@ class WorkflowManager:
 9. expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., ["xlsx", "pdf"]). If format is unclear or not specified, use empty list []
 10. qualityRequirements: Quality requirements they have (accuracy, completeness) as {{accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}}
 11. successCriteria: Specific success criteria that define completion (array of strings)
+12. workflowName: Create a concise, descriptive name for this workflow in the detected language. The name should summarize the main task or goal (e.g., "Service Report January 2026", "Email Analysis", "Document Generation"). Keep it short (max 60 characters) and meaningful.
 
 Rules:
 - If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained
@@ -357,7 +369,8 @@ Return ONLY JSON (no markdown) with this exact structure:
     "accuracyThreshold": 0.0-1.0,
     "completenessThreshold": 0.0-1.0
   }},
-  "successCriteria": ["specific criterion 1", "specific criterion 2"]
+  "successCriteria": ["specific criterion 1", "specific criterion 2"],
+  "workflowName": "Concise workflow name in detected language (max 40 characters)"
 }}
 
 ## User Message
@@ -406,7 +419,8 @@ The following is the user's original input message. Analyze intent, normalize th
                 "accuracyThreshold": 0.8,
                 "completenessThreshold": 0.8
             },
-            "successCriteria": []
+            "successCriteria": [],
+            "workflowName": "New Workflow"
         }
 
     async def _executeFastPath(self, userInput: UserInputRequest, documents: List[ChatDocument]) -> None: