From 4e98ae4e6edc13670f271ad652ab250974eb8f83 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 4 Nov 2025 16:25:50 +0100
Subject: [PATCH] image ai topics fixed

---
 modules/aicore/aicorePluginAnthropic.py       | 21 ++++--
 modules/aicore/aicorePluginOpenai.py          |  1 +
 modules/interfaces/interfaceAiObjects.py      | 45 +++++++++++-
 .../mainServiceNeutralization.py              | 73 ++++++++++++++++++-
 modules/workflows/workflowManager.py          | 16 +++-
 5 files changed, 143 insertions(+), 13 deletions(-)

diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py
index 99fb9904..f354979c 100644
--- a/modules/aicore/aicorePluginAnthropic.py
+++ b/modules/aicore/aicorePluginAnthropic.py
@@ -254,9 +254,13 @@ class AiAnthropic(BaseConnectorAi):
             
             for contentItem in userContent:
                 if contentItem.get("type") == "text":
-                    textPrompt = contentItem.get("text", "")
+                    textPrompt = contentItem.get("text", "") or ""
                 elif contentItem.get("type") == "image_url":
-                    imageUrl = contentItem.get("image_url", {}).get("url", "")
+                    imageUrlDict = contentItem.get("image_url")
+                    if imageUrlDict and isinstance(imageUrlDict, dict):
+                        imageUrl = imageUrlDict.get("url", "") or ""
+                    else:
+                        imageUrl = None
             
             if not imageUrl or not imageUrl.startswith("data:"):
                 raise ValueError("No image data found in messages")
@@ -298,9 +302,16 @@ class AiAnthropic(BaseConnectorAi):
                 if msg.get("role") == "system":
                     systemContent = msg.get("content")
                     if isinstance(systemContent, list):
-                        systemPrompt = "\n".join([item.get("text", "") for item in systemContent if item.get("type") == "text"])
-                    else:
-                        systemPrompt = systemContent
+                        textParts = []
+                        for item in systemContent:
+                            if item.get("type") == "text":
+                                textValue = item.get("text")
+                                if textValue is not None:
+                                    textParts.append(str(textValue))
+                        if textParts:
+                            systemPrompt = "\n".join(textParts)
+                    elif systemContent is not None:
+                        systemPrompt = str(systemContent)
                     break
             
             # Get parameters from model (consistent with callAiBasic)
diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py
index c67a8a64..7f7e3c70 100644
--- a/modules/aicore/aicorePluginOpenai.py
+++ b/modules/aicore/aicorePluginOpenai.py
@@ -90,6 +90,7 @@ class AiOpenai(BaseConnectorAi):
                     (OperationTypeEnum.PLAN, 7),
                     (OperationTypeEnum.DATA_ANALYSE, 8),
                     (OperationTypeEnum.DATA_GENERATE, 8)
+                    # Note: GPT-3.5-turbo does NOT support vision/image operations
                 ),
                 version="gpt-3.5-turbo",
                 calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002
diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py
index 8b665b09..fc5899dd 100644
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@@ -187,6 +187,18 @@ class AiObjects:
         # Check if this is an image - Vision models need special handling
         isImage = (contentPart.typeGroup == "image") or (contentPart.mimeType and contentPart.mimeType.startswith("image/"))
         
+        # Determine the correct operation type based on content type
+        # Images should use IMAGE_ANALYSE, not the generic operation type
+        actualOperationType = options.operationType
+        if isImage:
+            actualOperationType = OperationTypeEnum.IMAGE_ANALYSE
+            # Get vision-capable models for images
+            availableModels = modelRegistry.getAvailableModels()
+            visionFailoverList = modelSelector.getFailoverModelList(prompt, "", AiCallOptions(operationType=actualOperationType), availableModels)
+            if visionFailoverList:
+                logger.debug(f"Using {len(visionFailoverList)} vision-capable models for image processing")
+                failoverModelList = visionFailoverList
+        
         for attempt, model in enumerate(failoverModelList):
             try:
                 logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
@@ -197,24 +209,49 @@ class AiObjects:
                     from modules.datamodels.datamodelAi import AiModelCall, AiCallOptions as AiCallOpts
                     
                     try:
+                        # Validate and prepare image data
+                        if not contentPart.data:
+                            raise ValueError("Image content part has no data")
+                        
+                        # Ensure mimeType is valid
+                        mimeType = contentPart.mimeType or "image/jpeg"
+                        if not mimeType.startswith("image/"):
+                            raise ValueError(f"Invalid mimeType for image: {mimeType}")
+                        
+                        # Prepare base64 data
+                        if isinstance(contentPart.data, str):
+                            # Already base64 encoded - validate it
+                            try:
+                                base64.b64decode(contentPart.data, validate=True)
+                                base64Data = contentPart.data
+                            except Exception as e:
+                                raise ValueError(f"Invalid base64 data in contentPart: {str(e)}")
+                        elif isinstance(contentPart.data, bytes):
+                            # Binary data - encode to base64
+                            base64Data = base64.b64encode(contentPart.data).decode('utf-8')
+                        else:
+                            raise ValueError(f"Unsupported data type for image: {type(contentPart.data)}")
+                        
+                        # Create data URL
+                        imageDataUrl = f"data:{mimeType};base64,{base64Data}"
+                        
                         modelCall = AiModelCall(
                             messages=[
                                 {
                                     "role": "user",
                                     "content": [
-                                        {"type": "text", "text": prompt},
+                                        {"type": "text", "text": prompt or ""},
                                         {
                                             "type": "image_url",
                                             "image_url": {
-                                                "url": f"data:{contentPart.mimeType};base64,{contentPart.data}" if isinstance(contentPart.data, str) else 
-                                                       f"data:{contentPart.mimeType};base64,{base64.b64encode(contentPart.data).decode('utf-8')}"
+                                                "url": imageDataUrl
                                             }
                                         }
                                     ]
                                 }
                             ],
                             model=model,
-                            options=AiCallOpts(operationType=options.operationType)
+                            options=AiCallOpts(operationType=actualOperationType)
                         )
                         
                         modelResponse = await model.functionCall(modelCall)
diff --git a/modules/services/serviceNeutralization/mainServiceNeutralization.py b/modules/services/serviceNeutralization/mainServiceNeutralization.py
index e475f1b3..0b37db77 100644
--- a/modules/services/serviceNeutralization/mainServiceNeutralization.py
+++ b/modules/services/serviceNeutralization/mainServiceNeutralization.py
@@ -62,7 +62,8 @@ class NeutralizationService:
         return self._neutralizeText(text, 'text')
 
     def processFile(self, fileId: str) -> Dict[str, Any]:
-        """Neutralize a file referenced by its fileId using component interface."""
+        """Neutralize a file referenced by its fileId using component interface.
+        Binary files are not neutralized but will be indicated in the result."""
         if not self.interfaceDbComponent:
             raise ValueError("Component interface is required to process a file by fileId")
         # Fetch file data and metadata
@@ -74,6 +75,23 @@ class NeutralizationService:
             fileInfo = None
         fileName = getattr(fileInfo, 'fileName', None) if fileInfo else None
         mimeType = getattr(fileInfo, 'mimeType', None) if fileInfo else None
+        
+        # Check if file is binary and cannot be neutralized
+        if self._isBinaryMimeType(mimeType or ''):
+            # Return a result indicating binary file (not neutralized)
+            return {
+                'file_id': fileId,
+                'is_binary': True,
+                'mime_type': mimeType or 'unknown',
+                'file_name': fileName or 'unknown',
+                'neutralized_text': None,
+                'processed_info': {
+                    'type': 'binary',
+                    'status': 'skipped',
+                    'message': 'Binary file neutralization will be implemented in the future'
+                }
+            }
+        
         fileData = self.interfaceDbComponent.getFileData(fileId)
         if not fileData:
             raise ValueError(f"No file data found for fileId: {fileId}")
@@ -93,7 +111,7 @@ class NeutralizationService:
                 except UnicodeDecodeError:
                     continue
             if decoded is None:
-                raise ValueError("Unable to decode file content")
+                raise ValueError("Unable to decode file content as text. This may indicate a binary file that cannot be neutralized.")
             textContent = decoded
 
         result = self._neutralizeText(textContent, textType)
@@ -101,6 +119,7 @@ class NeutralizationService:
         if fileName:
             result['neutralized_file_name'] = f"neutralized_{fileName}"
         result['file_id'] = fileId
+        result['is_binary'] = False
         return result
 
     def resolveText(self, text: str) -> str:
@@ -231,6 +250,55 @@ class NeutralizationService:
                 processed_info={'type': 'error', 'error': str(e)}
             ).model_dump()
         
+    def _isBinaryMimeType(self, mime_type: str) -> bool:
+        """Check if a MIME type represents binary content that cannot be neutralized as text"""
+        if not mime_type:
+            return False
+        
+        mime_type_lower = mime_type.lower()
+        
+        # Text-based MIME types that CAN be neutralized (explicit list)
+        text_mime_types = [
+            'text/plain', 'text/html', 'text/css', 'text/markdown', 'text/csv',
+            'text/javascript', 'text/xml', 'text/json',
+            'application/json', 'application/xml', 'application/javascript',
+            'application/csv'
+        ]
+        
+        # Check explicit text types first
+        if mime_type_lower in text_mime_types:
+            return False
+        
+        # Text-based prefixes that can be neutralized
+        if mime_type_lower.startswith('text/'):
+            return False
+        
+        # Binary MIME types that CANNOT be neutralized
+        binary_mime_prefixes = [
+            'image/', 'audio/', 'video/', 
+            'application/pdf', 'application/zip',
+            'application/octet-stream', 'application/x-', 
+            'application/vnd.', 'application/msword', 
+            'application/vnd.ms-', 'application/vnd.openxmlformats-'
+        ]
+        
+        # Check if it's a binary type by prefix
+        if any(mime_type_lower.startswith(prefix) for prefix in binary_mime_prefixes):
+            return True
+        
+        # Additional specific binary document types
+        binary_mime_types = [
+            'application/pdf', 'application/msword', 'application/vnd.ms-excel',
+            'application/vnd.ms-powerpoint', 
+            'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+            'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+            'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+            'application/zip', 'application/x-rar-compressed', 'application/x-7z-compressed',
+            'application/x-tar', 'application/gzip'
+        ]
+        
+        return mime_type_lower in binary_mime_types
+    
     def _getContentTypeFromMime(self, mime_type: str) -> str:
         """Determine content type from MIME type for neutralization processing"""
         if mime_type.startswith('text/'):
@@ -241,3 +309,4 @@ class NeutralizationService:
             return 'csv'
         else:
             return 'text'  # Default to text processing
+
diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py
index 87b9b599..d2f29ddc 100644
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@@ -757,7 +757,19 @@ class WorkflowManager:
                         # Neutralize the file using the neutralization service
                         neutralizationResult = self.services.neutralization.processFile(fileId)
                         
-                        if neutralizationResult and 'neutralized_text' in neutralizationResult:
+                        # Check if file is binary (not neutralized)
+                        if neutralizationResult.get('is_binary', False):
+                            # Binary file - log INFO and use original file
+                            infoMsg = f"File '{originalFileName}' (MIME type: {neutralizationResult.get('mime_type', 'unknown')}) is a binary file. Binary file neutralization will be implemented in the future. Using original file without neutralization."
+                            logger.info(infoMsg)
+                            self.services.chat.storeLog(workflow, {
+                                "message": infoMsg,
+                                "type": "info",
+                                "status": "running",
+                                "progress": 50
+                            })
+                            # Use original file (fileIdToUse already set to fileId)
+                        elif neutralizationResult and 'neutralized_text' in neutralizationResult:
                             neutralizedText = neutralizationResult['neutralized_text']
                             
                             # Create new file with neutralized content
@@ -800,7 +812,7 @@ class WorkflowManager:
                             "progress": -1
                         })
                 
-                # Only add document if neutralization didn't fail (or if neutralization is disabled)
+                # Only skip document if neutralization failed (not for binary files)
                 if not neutralizationFailed:
                     # Create document with file ID (neutralized or original)
                     document = ChatDocument(