From 4e98ae4e6edc13670f271ad652ab250974eb8f83 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 4 Nov 2025 16:25:50 +0100 Subject: [PATCH] image ai topics fixed --- modules/aicore/aicorePluginAnthropic.py | 21 ++++-- modules/aicore/aicorePluginOpenai.py | 1 + modules/interfaces/interfaceAiObjects.py | 45 +++++++++++- .../mainServiceNeutralization.py | 73 ++++++++++++++++++- modules/workflows/workflowManager.py | 16 +++- 5 files changed, 143 insertions(+), 13 deletions(-) diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py index 99fb9904..f354979c 100644 --- a/modules/aicore/aicorePluginAnthropic.py +++ b/modules/aicore/aicorePluginAnthropic.py @@ -254,9 +254,13 @@ class AiAnthropic(BaseConnectorAi): for contentItem in userContent: if contentItem.get("type") == "text": - textPrompt = contentItem.get("text", "") + textPrompt = contentItem.get("text", "") or "" elif contentItem.get("type") == "image_url": - imageUrl = contentItem.get("image_url", {}).get("url", "") + imageUrlDict = contentItem.get("image_url") + if imageUrlDict and isinstance(imageUrlDict, dict): + imageUrl = imageUrlDict.get("url", "") or "" + else: + imageUrl = None if not imageUrl or not imageUrl.startswith("data:"): raise ValueError("No image data found in messages") @@ -298,9 +302,16 @@ class AiAnthropic(BaseConnectorAi): if msg.get("role") == "system": systemContent = msg.get("content") if isinstance(systemContent, list): - systemPrompt = "\n".join([item.get("text", "") for item in systemContent if item.get("type") == "text"]) - else: - systemPrompt = systemContent + textParts = [] + for item in systemContent: + if item.get("type") == "text": + textValue = item.get("text") + if textValue is not None: + textParts.append(str(textValue)) + if textParts: + systemPrompt = "\n".join(textParts) + elif systemContent is not None: + systemPrompt = str(systemContent) break # Get parameters from model (consistent with callAiBasic) diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py index c67a8a64..7f7e3c70 100644 --- a/modules/aicore/aicorePluginOpenai.py +++ b/modules/aicore/aicorePluginOpenai.py @@ -90,6 +90,7 @@ class AiOpenai(BaseConnectorAi): (OperationTypeEnum.PLAN, 7), (OperationTypeEnum.DATA_ANALYSE, 8), (OperationTypeEnum.DATA_GENERATE, 8) + # Note: GPT-3.5-turbo does NOT support vision/image operations ), version="gpt-3.5-turbo", calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002 diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py index 8b665b09..fc5899dd 100644 --- a/modules/interfaces/interfaceAiObjects.py +++ b/modules/interfaces/interfaceAiObjects.py @@ -187,6 +187,18 @@ class AiObjects: # Check if this is an image - Vision models need special handling isImage = (contentPart.typeGroup == "image") or (contentPart.mimeType and contentPart.mimeType.startswith("image/")) + # Determine the correct operation type based on content type + # Images should use IMAGE_ANALYSE, not the generic operation type + actualOperationType = options.operationType + if isImage: + actualOperationType = OperationTypeEnum.IMAGE_ANALYSE + # Get vision-capable models for images + availableModels = modelRegistry.getAvailableModels() + visionFailoverList = modelSelector.getFailoverModelList(prompt, "", AiCallOptions(operationType=actualOperationType), availableModels) + if visionFailoverList: + logger.debug(f"Using {len(visionFailoverList)} vision-capable models for image processing") + failoverModelList = visionFailoverList + for attempt, model in enumerate(failoverModelList): try: logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})") @@ -197,24 +209,49 @@ class AiObjects: from modules.datamodels.datamodelAi import AiModelCall, AiCallOptions as AiCallOpts try: + # Validate and prepare image data + if not contentPart.data: + raise ValueError("Image content part has no data") + + # Ensure mimeType is valid + mimeType = contentPart.mimeType or "image/jpeg" + if not mimeType.startswith("image/"): + raise ValueError(f"Invalid mimeType for image: {mimeType}") + + # Prepare base64 data + if isinstance(contentPart.data, str): + # Already base64 encoded - validate it + try: + base64.b64decode(contentPart.data, validate=True) + base64Data = contentPart.data + except Exception as e: + raise ValueError(f"Invalid base64 data in contentPart: {str(e)}") + elif isinstance(contentPart.data, bytes): + # Binary data - encode to base64 + base64Data = base64.b64encode(contentPart.data).decode('utf-8') + else: + raise ValueError(f"Unsupported data type for image: {type(contentPart.data)}") + + # Create data URL + imageDataUrl = f"data:{mimeType};base64,{base64Data}" + modelCall = AiModelCall( messages=[ { "role": "user", "content": [ - {"type": "text", "text": prompt}, + {"type": "text", "text": prompt or ""}, { "type": "image_url", "image_url": { - "url": f"data:{contentPart.mimeType};base64,{contentPart.data}" if isinstance(contentPart.data, str) else - f"data:{contentPart.mimeType};base64,{base64.b64encode(contentPart.data).decode('utf-8')}" + "url": imageDataUrl } } ] } ], model=model, - options=AiCallOpts(operationType=options.operationType) + options=AiCallOpts(operationType=actualOperationType) ) modelResponse = await model.functionCall(modelCall) diff --git a/modules/services/serviceNeutralization/mainServiceNeutralization.py b/modules/services/serviceNeutralization/mainServiceNeutralization.py index e475f1b3..0b37db77 100644 --- a/modules/services/serviceNeutralization/mainServiceNeutralization.py +++ b/modules/services/serviceNeutralization/mainServiceNeutralization.py @@ -62,7 +62,8 @@ class NeutralizationService: return self._neutralizeText(text, 'text') def processFile(self, fileId: str) -> Dict[str, Any]: - """Neutralize a file referenced by its fileId using component interface.""" + """Neutralize a file referenced by its fileId using component interface. + Binary files are not neutralized but will be indicated in the result.""" if not self.interfaceDbComponent: raise ValueError("Component interface is required to process a file by fileId") # Fetch file data and metadata @@ -74,6 +75,23 @@ class NeutralizationService: fileInfo = None fileName = getattr(fileInfo, 'fileName', None) if fileInfo else None mimeType = getattr(fileInfo, 'mimeType', None) if fileInfo else None + + # Check if file is binary and cannot be neutralized + if self._isBinaryMimeType(mimeType or ''): + # Return a result indicating binary file (not neutralized) + return { + 'file_id': fileId, + 'is_binary': True, + 'mime_type': mimeType or 'unknown', + 'file_name': fileName or 'unknown', + 'neutralized_text': None, + 'processed_info': { + 'type': 'binary', + 'status': 'skipped', + 'message': 'Binary file neutralization will be implemented in the future' + } + } + fileData = self.interfaceDbComponent.getFileData(fileId) if not fileData: raise ValueError(f"No file data found for fileId: {fileId}") @@ -93,7 +111,7 @@ class NeutralizationService: except UnicodeDecodeError: continue if decoded is None: - raise ValueError("Unable to decode file content") + raise ValueError("Unable to decode file content as text. This may indicate a binary file that cannot be neutralized.") textContent = decoded result = self._neutralizeText(textContent, textType) @@ -101,6 +119,7 @@ class NeutralizationService: if fileName: result['neutralized_file_name'] = f"neutralized_{fileName}" result['file_id'] = fileId + result['is_binary'] = False return result def resolveText(self, text: str) -> str: @@ -231,6 +250,55 @@ class NeutralizationService: processed_info={'type': 'error', 'error': str(e)} ).model_dump() + def _isBinaryMimeType(self, mime_type: str) -> bool: + """Check if a MIME type represents binary content that cannot be neutralized as text""" + if not mime_type: + return False + + mime_type_lower = mime_type.lower() + + # Text-based MIME types that CAN be neutralized (explicit list) + text_mime_types = [ + 'text/plain', 'text/html', 'text/css', 'text/markdown', 'text/csv', + 'text/javascript', 'text/xml', 'text/json', + 'application/json', 'application/xml', 'application/javascript', + 'application/csv' + ] + + # Check explicit text types first + if mime_type_lower in text_mime_types: + return False + + # Text-based prefixes that can be neutralized + if mime_type_lower.startswith('text/'): + return False + + # Binary MIME types that CANNOT be neutralized + binary_mime_prefixes = [ + 'image/', 'audio/', 'video/', + 'application/pdf', 'application/zip', + 'application/octet-stream', 'application/x-', + 'application/vnd.', 'application/msword', + 'application/vnd.ms-', 'application/vnd.openxmlformats-' + ] + + # Check if it's a binary type by prefix + if any(mime_type_lower.startswith(prefix) for prefix in binary_mime_prefixes): + return True + + # Additional specific binary document types + binary_mime_types = [ + 'application/pdf', 'application/msword', 'application/vnd.ms-excel', + 'application/vnd.ms-powerpoint', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + 'application/zip', 'application/x-rar-compressed', 'application/x-7z-compressed', + 'application/x-tar', 'application/gzip' + ] + + return mime_type_lower in binary_mime_types + def _getContentTypeFromMime(self, mime_type: str) -> str: """Determine content type from MIME type for neutralization processing""" if mime_type.startswith('text/'): @@ -241,3 +309,4 @@ class NeutralizationService: return 'csv' else: return 'text' # Default to text processing + diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py index 87b9b599..d2f29ddc 100644 --- a/modules/workflows/workflowManager.py +++ b/modules/workflows/workflowManager.py @@ -757,7 +757,19 @@ class WorkflowManager: # Neutralize the file using the neutralization service neutralizationResult = self.services.neutralization.processFile(fileId) - if neutralizationResult and 'neutralized_text' in neutralizationResult: + # Check if file is binary (not neutralized) + if neutralizationResult.get('is_binary', False): + # Binary file - log INFO and use original file + infoMsg = f"File '{originalFileName}' (MIME type: {neutralizationResult.get('mime_type', 'unknown')}) is a binary file. Binary file neutralization will be implemented in the future. Using original file without neutralization." + logger.info(infoMsg) + self.services.chat.storeLog(workflow, { + "message": infoMsg, + "type": "info", + "status": "running", + "progress": 50 + }) + # Use original file (fileIdToUse already set to fileId) + elif neutralizationResult and 'neutralized_text' in neutralizationResult: neutralizedText = neutralizationResult['neutralized_text'] # Create new file with neutralized content @@ -800,7 +812,7 @@ class WorkflowManager: "progress": -1 }) - # Only add document if neutralization didn't fail (or if neutralization is disabled) + # Only skip document if neutralization failed (not for binary files) if not neutralizationFailed: # Create document with file ID (neutralized or original) document = ChatDocument(