image ai topics fixed

2025-11-04 16:25:50 +01:00 · 2025-11-04 16:25:50 +01:00 · 4e98ae4e6e
commit 4e98ae4e6e
parent e11ab4ebc5
5 changed files with 143 additions and 13 deletions
--- a/modules/aicore/aicorePluginAnthropic.py
+++ b/modules/aicore/aicorePluginAnthropic.py
@ -254,9 +254,13 @@ class AiAnthropic(BaseConnectorAi):
            
            for contentItem in userContent:
                if contentItem.get("type") == "text":
-                    textPrompt = contentItem.get("text", "")
+                    textPrompt = contentItem.get("text", "") or ""
                elif contentItem.get("type") == "image_url":
-                    imageUrl = contentItem.get("image_url", {}).get("url", "")
+                    imageUrlDict = contentItem.get("image_url")
+                    if imageUrlDict and isinstance(imageUrlDict, dict):
+                        imageUrl = imageUrlDict.get("url", "") or ""
+                    else:
+                        imageUrl = None
            
            if not imageUrl or not imageUrl.startswith("data:"):
                raise ValueError("No image data found in messages")
@ -298,9 +302,16 @@ class AiAnthropic(BaseConnectorAi):
                if msg.get("role") == "system":
                    systemContent = msg.get("content")
                    if isinstance(systemContent, list):
-                        systemPrompt = "\n".join([item.get("text", "") for item in systemContent if item.get("type") == "text"])
-                    else:
-                        systemPrompt = systemContent
+                        textParts = []
+                        for item in systemContent:
+                            if item.get("type") == "text":
+                                textValue = item.get("text")
+                                if textValue is not None:
+                                    textParts.append(str(textValue))
+                        if textParts:
+                            systemPrompt = "\n".join(textParts)
+                    elif systemContent is not None:
+                        systemPrompt = str(systemContent)
                    break
            
            # Get parameters from model (consistent with callAiBasic)
--- a/modules/aicore/aicorePluginOpenai.py
+++ b/modules/aicore/aicorePluginOpenai.py
@ -90,6 +90,7 @@ class AiOpenai(BaseConnectorAi):
                    (OperationTypeEnum.PLAN, 7),
                    (OperationTypeEnum.DATA_ANALYSE, 8),
                    (OperationTypeEnum.DATA_GENERATE, 8)
+                    # Note: GPT-3.5-turbo does NOT support vision/image operations
                ),
                version="gpt-3.5-turbo",
                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@ -187,6 +187,18 @@ class AiObjects:
        # Check if this is an image - Vision models need special handling
        isImage = (contentPart.typeGroup == "image") or (contentPart.mimeType and contentPart.mimeType.startswith("image/"))
        
+        # Determine the correct operation type based on content type
+        # Images should use IMAGE_ANALYSE, not the generic operation type
+        actualOperationType = options.operationType
+        if isImage:
+            actualOperationType = OperationTypeEnum.IMAGE_ANALYSE
+            # Get vision-capable models for images
+            availableModels = modelRegistry.getAvailableModels()
+            visionFailoverList = modelSelector.getFailoverModelList(prompt, "", AiCallOptions(operationType=actualOperationType), availableModels)
+            if visionFailoverList:
+                logger.debug(f"Using {len(visionFailoverList)} vision-capable models for image processing")
+                failoverModelList = visionFailoverList
+        
        for attempt, model in enumerate(failoverModelList):
            try:
                logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
@ -197,24 +209,49 @@ class AiObjects:
                    from modules.datamodels.datamodelAi import AiModelCall, AiCallOptions as AiCallOpts
                    
                    try:
+                        # Validate and prepare image data
+                        if not contentPart.data:
+                            raise ValueError("Image content part has no data")
+                        
+                        # Ensure mimeType is valid
+                        mimeType = contentPart.mimeType or "image/jpeg"
+                        if not mimeType.startswith("image/"):
+                            raise ValueError(f"Invalid mimeType for image: {mimeType}")
+                        
+                        # Prepare base64 data
+                        if isinstance(contentPart.data, str):
+                            # Already base64 encoded - validate it
+                            try:
+                                base64.b64decode(contentPart.data, validate=True)
+                                base64Data = contentPart.data
+                            except Exception as e:
+                                raise ValueError(f"Invalid base64 data in contentPart: {str(e)}")
+                        elif isinstance(contentPart.data, bytes):
+                            # Binary data - encode to base64
+                            base64Data = base64.b64encode(contentPart.data).decode('utf-8')
+                        else:
+                            raise ValueError(f"Unsupported data type for image: {type(contentPart.data)}")
+                        
+                        # Create data URL
+                        imageDataUrl = f"data:{mimeType};base64,{base64Data}"
+                        
                        modelCall = AiModelCall(
                            messages=[
                                {
                                    "role": "user",
                                    "content": [
-                                        {"type": "text", "text": prompt},
+                                        {"type": "text", "text": prompt or ""},
                                        {
                                            "type": "image_url",
                                            "image_url": {
-                                                "url": f"data:{contentPart.mimeType};base64,{contentPart.data}" if isinstance(contentPart.data, str) else 
-                                                       f"data:{contentPart.mimeType};base64,{base64.b64encode(contentPart.data).decode('utf-8')}"
+                                                "url": imageDataUrl
                                            }
                                        }
                                    ]
                                }
                            ],
                            model=model,
-                            options=AiCallOpts(operationType=options.operationType)
+                            options=AiCallOpts(operationType=actualOperationType)
                        )
                        
                        modelResponse = await model.functionCall(modelCall)
--- a/modules/services/serviceNeutralization/mainServiceNeutralization.py
+++ b/modules/services/serviceNeutralization/mainServiceNeutralization.py
@ -62,7 +62,8 @@ class NeutralizationService:
        return self._neutralizeText(text, 'text')

    def processFile(self, fileId: str) -> Dict[str, Any]:
-        """Neutralize a file referenced by its fileId using component interface."""
+        """Neutralize a file referenced by its fileId using component interface.
+        Binary files are not neutralized but will be indicated in the result."""
        if not self.interfaceDbComponent:
            raise ValueError("Component interface is required to process a file by fileId")
        # Fetch file data and metadata
@ -74,6 +75,23 @@ class NeutralizationService:
            fileInfo = None
        fileName = getattr(fileInfo, 'fileName', None) if fileInfo else None
        mimeType = getattr(fileInfo, 'mimeType', None) if fileInfo else None
+        
+        # Check if file is binary and cannot be neutralized
+        if self._isBinaryMimeType(mimeType or ''):
+            # Return a result indicating binary file (not neutralized)
+            return {
+                'file_id': fileId,
+                'is_binary': True,
+                'mime_type': mimeType or 'unknown',
+                'file_name': fileName or 'unknown',
+                'neutralized_text': None,
+                'processed_info': {
+                    'type': 'binary',
+                    'status': 'skipped',
+                    'message': 'Binary file neutralization will be implemented in the future'
+                }
+            }
+        
        fileData = self.interfaceDbComponent.getFileData(fileId)
        if not fileData:
            raise ValueError(f"No file data found for fileId: {fileId}")
@ -93,7 +111,7 @@ class NeutralizationService:
                except UnicodeDecodeError:
                    continue
            if decoded is None:
-                raise ValueError("Unable to decode file content")
+                raise ValueError("Unable to decode file content as text. This may indicate a binary file that cannot be neutralized.")
            textContent = decoded

        result = self._neutralizeText(textContent, textType)
@ -101,6 +119,7 @@ class NeutralizationService:
        if fileName:
            result['neutralized_file_name'] = f"neutralized_{fileName}"
        result['file_id'] = fileId
+        result['is_binary'] = False
        return result

    def resolveText(self, text: str) -> str:
@ -231,6 +250,55 @@ class NeutralizationService:
                processed_info={'type': 'error', 'error': str(e)}
            ).model_dump()
        
+    def _isBinaryMimeType(self, mime_type: str) -> bool:
+        """Check if a MIME type represents binary content that cannot be neutralized as text"""
+        if not mime_type:
+            return False
+        
+        mime_type_lower = mime_type.lower()
+        
+        # Text-based MIME types that CAN be neutralized (explicit list)
+        text_mime_types = [
+            'text/plain', 'text/html', 'text/css', 'text/markdown', 'text/csv',
+            'text/javascript', 'text/xml', 'text/json',
+            'application/json', 'application/xml', 'application/javascript',
+            'application/csv'
+        ]
+        
+        # Check explicit text types first
+        if mime_type_lower in text_mime_types:
+            return False
+        
+        # Text-based prefixes that can be neutralized
+        if mime_type_lower.startswith('text/'):
+            return False
+        
+        # Binary MIME types that CANNOT be neutralized
+        binary_mime_prefixes = [
+            'image/', 'audio/', 'video/', 
+            'application/pdf', 'application/zip',
+            'application/octet-stream', 'application/x-', 
+            'application/vnd.', 'application/msword', 
+            'application/vnd.ms-', 'application/vnd.openxmlformats-'
+        ]
+        
+        # Check if it's a binary type by prefix
+        if any(mime_type_lower.startswith(prefix) for prefix in binary_mime_prefixes):
+            return True
+        
+        # Additional specific binary document types
+        binary_mime_types = [
+            'application/pdf', 'application/msword', 'application/vnd.ms-excel',
+            'application/vnd.ms-powerpoint', 
+            'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+            'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+            'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+            'application/zip', 'application/x-rar-compressed', 'application/x-7z-compressed',
+            'application/x-tar', 'application/gzip'
+        ]
+        
+        return mime_type_lower in binary_mime_types
+    
    def _getContentTypeFromMime(self, mime_type: str) -> str:
        """Determine content type from MIME type for neutralization processing"""
        if mime_type.startswith('text/'):
@ -241,3 +309,4 @@ class NeutralizationService:
            return 'csv'
        else:
            return 'text'  # Default to text processing
+
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@ -757,7 +757,19 @@ class WorkflowManager:
                        # Neutralize the file using the neutralization service
                        neutralizationResult = self.services.neutralization.processFile(fileId)
                        
-                        if neutralizationResult and 'neutralized_text' in neutralizationResult:
+                        # Check if file is binary (not neutralized)
+                        if neutralizationResult.get('is_binary', False):
+                            # Binary file - log INFO and use original file
+                            infoMsg = f"File '{originalFileName}' (MIME type: {neutralizationResult.get('mime_type', 'unknown')}) is a binary file. Binary file neutralization will be implemented in the future. Using original file without neutralization."
+                            logger.info(infoMsg)
+                            self.services.chat.storeLog(workflow, {
+                                "message": infoMsg,
+                                "type": "info",
+                                "status": "running",
+                                "progress": 50
+                            })
+                            # Use original file (fileIdToUse already set to fileId)
+                        elif neutralizationResult and 'neutralized_text' in neutralizationResult:
                            neutralizedText = neutralizationResult['neutralized_text']
                            
                            # Create new file with neutralized content
@ -800,7 +812,7 @@ class WorkflowManager:
                            "progress": -1
                        })
                
-                # Only add document if neutralization didn't fail (or if neutralization is disabled)
+                # Only skip document if neutralization failed (not for binary files)
                if not neutralizationFailed:
                    # Create document with file ID (neutralized or original)
                    document = ChatDocument(