image ai topics fixed

This commit is contained in:
ValueOn AG 2025-11-04 16:25:50 +01:00
parent e11ab4ebc5
commit 4e98ae4e6e
5 changed files with 143 additions and 13 deletions

View file

@ -254,9 +254,13 @@ class AiAnthropic(BaseConnectorAi):
for contentItem in userContent:
if contentItem.get("type") == "text":
textPrompt = contentItem.get("text", "")
textPrompt = contentItem.get("text", "") or ""
elif contentItem.get("type") == "image_url":
imageUrl = contentItem.get("image_url", {}).get("url", "")
imageUrlDict = contentItem.get("image_url")
if imageUrlDict and isinstance(imageUrlDict, dict):
imageUrl = imageUrlDict.get("url", "") or ""
else:
imageUrl = None
if not imageUrl or not imageUrl.startswith("data:"):
raise ValueError("No image data found in messages")
@ -298,9 +302,16 @@ class AiAnthropic(BaseConnectorAi):
if msg.get("role") == "system":
systemContent = msg.get("content")
if isinstance(systemContent, list):
systemPrompt = "\n".join([item.get("text", "") for item in systemContent if item.get("type") == "text"])
else:
systemPrompt = systemContent
textParts = []
for item in systemContent:
if item.get("type") == "text":
textValue = item.get("text")
if textValue is not None:
textParts.append(str(textValue))
if textParts:
systemPrompt = "\n".join(textParts)
elif systemContent is not None:
systemPrompt = str(systemContent)
break
# Get parameters from model (consistent with callAiBasic)

View file

@ -90,6 +90,7 @@ class AiOpenai(BaseConnectorAi):
(OperationTypeEnum.PLAN, 7),
(OperationTypeEnum.DATA_ANALYSE, 8),
(OperationTypeEnum.DATA_GENERATE, 8)
# Note: GPT-3.5-turbo does NOT support vision/image operations
),
version="gpt-3.5-turbo",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002

View file

@ -187,6 +187,18 @@ class AiObjects:
# Check if this is an image - Vision models need special handling
isImage = (contentPart.typeGroup == "image") or (contentPart.mimeType and contentPart.mimeType.startswith("image/"))
# Determine the correct operation type based on content type
# Images should use IMAGE_ANALYSE, not the generic operation type
actualOperationType = options.operationType
if isImage:
actualOperationType = OperationTypeEnum.IMAGE_ANALYSE
# Get vision-capable models for images
availableModels = modelRegistry.getAvailableModels()
visionFailoverList = modelSelector.getFailoverModelList(prompt, "", AiCallOptions(operationType=actualOperationType), availableModels)
if visionFailoverList:
logger.debug(f"Using {len(visionFailoverList)} vision-capable models for image processing")
failoverModelList = visionFailoverList
for attempt, model in enumerate(failoverModelList):
try:
logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
@ -197,24 +209,49 @@ class AiObjects:
from modules.datamodels.datamodelAi import AiModelCall, AiCallOptions as AiCallOpts
try:
# Validate and prepare image data
if not contentPart.data:
raise ValueError("Image content part has no data")
# Ensure mimeType is valid
mimeType = contentPart.mimeType or "image/jpeg"
if not mimeType.startswith("image/"):
raise ValueError(f"Invalid mimeType for image: {mimeType}")
# Prepare base64 data
if isinstance(contentPart.data, str):
# Already base64 encoded - validate it
try:
base64.b64decode(contentPart.data, validate=True)
base64Data = contentPart.data
except Exception as e:
raise ValueError(f"Invalid base64 data in contentPart: {str(e)}")
elif isinstance(contentPart.data, bytes):
# Binary data - encode to base64
base64Data = base64.b64encode(contentPart.data).decode('utf-8')
else:
raise ValueError(f"Unsupported data type for image: {type(contentPart.data)}")
# Create data URL
imageDataUrl = f"data:{mimeType};base64,{base64Data}"
modelCall = AiModelCall(
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "text", "text": prompt or ""},
{
"type": "image_url",
"image_url": {
"url": f"data:{contentPart.mimeType};base64,{contentPart.data}" if isinstance(contentPart.data, str) else
f"data:{contentPart.mimeType};base64,{base64.b64encode(contentPart.data).decode('utf-8')}"
"url": imageDataUrl
}
}
]
}
],
model=model,
options=AiCallOpts(operationType=options.operationType)
options=AiCallOpts(operationType=actualOperationType)
)
modelResponse = await model.functionCall(modelCall)

View file

@ -62,7 +62,8 @@ class NeutralizationService:
return self._neutralizeText(text, 'text')
def processFile(self, fileId: str) -> Dict[str, Any]:
"""Neutralize a file referenced by its fileId using component interface."""
"""Neutralize a file referenced by its fileId using component interface.
Binary files are not neutralized but will be indicated in the result."""
if not self.interfaceDbComponent:
raise ValueError("Component interface is required to process a file by fileId")
# Fetch file data and metadata
@ -74,6 +75,23 @@ class NeutralizationService:
fileInfo = None
fileName = getattr(fileInfo, 'fileName', None) if fileInfo else None
mimeType = getattr(fileInfo, 'mimeType', None) if fileInfo else None
# Check if file is binary and cannot be neutralized
if self._isBinaryMimeType(mimeType or ''):
# Return a result indicating binary file (not neutralized)
return {
'file_id': fileId,
'is_binary': True,
'mime_type': mimeType or 'unknown',
'file_name': fileName or 'unknown',
'neutralized_text': None,
'processed_info': {
'type': 'binary',
'status': 'skipped',
'message': 'Binary file neutralization will be implemented in the future'
}
}
fileData = self.interfaceDbComponent.getFileData(fileId)
if not fileData:
raise ValueError(f"No file data found for fileId: {fileId}")
@ -93,7 +111,7 @@ class NeutralizationService:
except UnicodeDecodeError:
continue
if decoded is None:
raise ValueError("Unable to decode file content")
raise ValueError("Unable to decode file content as text. This may indicate a binary file that cannot be neutralized.")
textContent = decoded
result = self._neutralizeText(textContent, textType)
@ -101,6 +119,7 @@ class NeutralizationService:
if fileName:
result['neutralized_file_name'] = f"neutralized_{fileName}"
result['file_id'] = fileId
result['is_binary'] = False
return result
def resolveText(self, text: str) -> str:
@ -231,6 +250,55 @@ class NeutralizationService:
processed_info={'type': 'error', 'error': str(e)}
).model_dump()
def _isBinaryMimeType(self, mime_type: str) -> bool:
"""Check if a MIME type represents binary content that cannot be neutralized as text"""
if not mime_type:
return False
mime_type_lower = mime_type.lower()
# Text-based MIME types that CAN be neutralized (explicit list)
text_mime_types = [
'text/plain', 'text/html', 'text/css', 'text/markdown', 'text/csv',
'text/javascript', 'text/xml', 'text/json',
'application/json', 'application/xml', 'application/javascript',
'application/csv'
]
# Check explicit text types first
if mime_type_lower in text_mime_types:
return False
# Text-based prefixes that can be neutralized
if mime_type_lower.startswith('text/'):
return False
# Binary MIME types that CANNOT be neutralized
binary_mime_prefixes = [
'image/', 'audio/', 'video/',
'application/pdf', 'application/zip',
'application/octet-stream', 'application/x-',
'application/vnd.', 'application/msword',
'application/vnd.ms-', 'application/vnd.openxmlformats-'
]
# Check if it's a binary type by prefix
if any(mime_type_lower.startswith(prefix) for prefix in binary_mime_prefixes):
return True
# Additional specific binary document types
binary_mime_types = [
'application/pdf', 'application/msword', 'application/vnd.ms-excel',
'application/vnd.ms-powerpoint',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'application/zip', 'application/x-rar-compressed', 'application/x-7z-compressed',
'application/x-tar', 'application/gzip'
]
return mime_type_lower in binary_mime_types
def _getContentTypeFromMime(self, mime_type: str) -> str:
"""Determine content type from MIME type for neutralization processing"""
if mime_type.startswith('text/'):
@ -241,3 +309,4 @@ class NeutralizationService:
return 'csv'
else:
return 'text' # Default to text processing

View file

@ -757,7 +757,19 @@ class WorkflowManager:
# Neutralize the file using the neutralization service
neutralizationResult = self.services.neutralization.processFile(fileId)
if neutralizationResult and 'neutralized_text' in neutralizationResult:
# Check if file is binary (not neutralized)
if neutralizationResult.get('is_binary', False):
# Binary file - log INFO and use original file
infoMsg = f"File '{originalFileName}' (MIME type: {neutralizationResult.get('mime_type', 'unknown')}) is a binary file. Binary file neutralization will be implemented in the future. Using original file without neutralization."
logger.info(infoMsg)
self.services.chat.storeLog(workflow, {
"message": infoMsg,
"type": "info",
"status": "running",
"progress": 50
})
# Use original file (fileIdToUse already set to fileId)
elif neutralizationResult and 'neutralized_text' in neutralizationResult:
neutralizedText = neutralizationResult['neutralized_text']
# Create new file with neutralized content
@ -800,7 +812,7 @@ class WorkflowManager:
"progress": -1
})
# Only add document if neutralization didn't fail (or if neutralization is disabled)
# Only skip document if neutralization failed (not for binary files)
if not neutralizationFailed:
# Create document with file ID (neutralized or original)
document = ChatDocument(