From ffdaf2a326332adc3c312b7a532b49cf710b5835 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sun, 26 Oct 2025 23:32:55 +0100 Subject: [PATCH] ai models ready for image generation --- IMAGE_ANALYSE_ANALYSIS.md | 150 ------------- modules/aicore/aicorePluginOpenai.py | 31 ++- test_ai_models.py | 310 ++++++++++++--------------- 3 files changed, 156 insertions(+), 335 deletions(-) delete mode 100644 IMAGE_ANALYSE_ANALYSIS.md diff --git a/IMAGE_ANALYSE_ANALYSIS.md b/IMAGE_ANALYSE_ANALYSIS.md deleted file mode 100644 index bb14bb39..00000000 --- a/IMAGE_ANALYSE_ANALYSIS.md +++ /dev/null @@ -1,150 +0,0 @@ -# Image Analysis Code Flow Analysis - -## Zusammenfassung der Parameter und Handovers - -### 1. Ablauf für Image-Analyse (durch Dokumentenverarbeitung) - -#### Eingabe -- **methodAi.process()** wird aufgerufen mit: - - `aiPrompt`: Textanweisung für die Bildanalyse - - `documentList`: Liste von Dokumenten (einschließlich Bilder) - - `resultType`: Output-Format (optional, default: txt) - -#### Verarbeitung -1. **mainServiceAi.py** → `callAiDocuments()` - - Delegiert an `subCoreAi.callAiDocuments()` - -2. **subCoreAi.py** → `callAiDocuments()` - - Prüft, ob Dokumente vorhanden sind - - Wenn ja: ruft `documentProcessor.callAiText()` auf - -3. **subDocumentProcessing.py** → `callAiText()` - - Ruft `processDocumentsPerChunk()` auf - -4. **subDocumentProcessing.py** → `_processChunksWithMapping()` - - Analysiert jeden Chunk - - **Wichtig**: Zeile 645-689 - Erkennung von Bildern - - Prüft `is_image` Flag basierend auf: - - `document_mime_type` (z.B. "image/jpeg") - - `part.mimeType` - - `part.typeGroup == "image"` - -5. **subCoreAi.py** → `readImage()` (wird aufgerufen für Bildchunks) - - Zeile 561-625 - - Setzt `operationType = IMAGE_ANALYSE` - - Ruft `aiObjects.callImage()` auf mit: - - `prompt`: Der Analyse-Prompt - - `imageData`: Die Bilddaten (bytes oder base64) - - `mimeType`: Z.B. "image/jpeg" - - `options`: Mit `operationType=IMAGE_ANALYSE` - -#### Ausgabe -- Textanalyse des Bildes - -### 2. Ablauf für direkte Image-Analyse - -#### Eingabe -- **mainServiceAi.readImage()** wird direkt aufgerufen mit: - - `prompt`: Textanweisung - - `imageData`: Bilddaten (bytes oder base64) - - `mimeType`: Z.B. "image/jpeg" - - `options`: Optional, wird auf `IMAGE_ANALYSE` gesetzt - -#### Verarbeitung -1. **mainServiceAi.py** → `readImage()` - - Delegiert an `subCoreAi.readImage()` - -2. **subCoreAi.py** → `readImage()` - - Setzt `operationType = IMAGE_ANALYSE` (Zeile 582) - - Ruft `aiObjects.callImage()` auf - -#### Ausgabe -- Textanalyse des Bildes - -## Wo werden welche Funktionen genutzt? - -### mainServiceAi.py - -#### `readImage()` (Zeile 96-105) -- **Verwendung**: Wird direkt von außen aufgerufen (z.B. API) -- **Delegiert an**: `subCoreAi.readImage()` -- **Verwendung**: ✅ Wird verwendet - -#### `generateImage()` (Zeile 108-118) -- **Verwendung**: Wird direkt von außen aufgerufen (z.B. API) -- **Delegiert an**: `subCoreAi.generateImage()` -- **Verwendung**: ✅ Wird verwendet - -### subCoreAi.py - -#### `readImage()` (Zeile 561-625) -- **Verwendung**: - 1. Wird von `mainServiceAi.readImage()` aufgerufen - 2. Wird von `subDocumentProcessing._processChunksWithMapping()` aufgerufen (Zeile 670) -- **Verwendung**: ✅ Wird verwendet - -#### `generateImage()` (Zeile 628-660) -- **Verwendung**: Wird von `mainServiceAi.generateImage()` aufgerufen -- **Verwendung**: ✅ Wird verwendet - -### subDocumentProcessing.py - -#### `_processChunksWithMapping()` (Zeile 594-994) -- **Bildanalyse**: Zeile 645-689 - - Erkennt Bilder basierend auf MIME-Type und typeGroup - - Ruft `core_ai.readImage()` auf -- **Verwendung**: ✅ Wird verwendet - -## Parameter-Validierung - -### ✅ Alle Parameter korrekt - -1. **operationType**: - - Wird immer auf `IMAGE_ANALYSE` gesetzt (subCoreAi Zeile 582) - - Wird korrekt übergeben - -2. **imageData**: - - Wird korrekt geladen und übergeben - - Unterstützt bytes und base64 - -3. **mimeType**: - - Wird automatisch erkannt - - Standard: "image/jpeg" - -4. **prompt**: - - Wird korrekt übergeben - - Kann von Benutzer angepasst werden - -## Handovers sind korrekt - -### mainServiceAi → subCoreAi -- ✅ `readImage()` delegiert korrekt -- ✅ `generateImage()` delegiert korrekt - -### subDocumentProcessing → subCoreAi -- ✅ Erkennt Bilder korrekt (Zeile 645-689) -- ✅ Ruft `readImage()` mit korrekten Parametern auf -- ✅ Setzt `operationType=IMAGE_ANALYSE` - -### subCoreAi → aiObjects -- ✅ Ruft `callImage()` mit korrekten Parametern auf -- ✅ Setzt `operationType=IMAGE_ANALYSE` - -## Identifizierte Probleme - -### ⚠️ Keine Probleme identifiziert - -Die Parameter und Handovers sind alle korrekt: -- ✅ Operation Type wird korrekt gesetzt -- ✅ Bilddaten werden korrekt geladen und übergeben -- ✅ MIME-Type wird korrekt erkannt -- ✅ Prompt wird korrekt übergeben -- ✅ Alle Delegierungen funktionieren korrekt - -## Test-Strategie - -Der Test verwendet: -1. Direkte Bildanalyse über `mainServiceAi.readImage()` -2. Testet alle Modelle die `IMAGE_ANALYSE` unterstützen -3. Validiert die Antworten auf Inhalt und Struktur - diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py index a2cc501c..c67a8a64 100644 --- a/modules/aicore/aicorePluginOpenai.py +++ b/modules/aicore/aicorePluginOpenai.py @@ -289,16 +289,31 @@ class AiOpenai(BaseConnectorAi): model = modelCall.model options = modelCall.options - # Parse unified prompt JSON format + # Get prompt from messages promptContent = messages[0]["content"] if messages else "" - import json - promptData = json.loads(promptContent) - # Extract parameters from unified prompt JSON - prompt = promptData.get("prompt", promptContent) - size = promptData.get("size", "1024x1024") - quality = promptData.get("quality", "standard") - style = promptData.get("style", "vivid") + # Parse prompt using AiCallPromptImage model + from modules.datamodels.datamodelAi import AiCallPromptImage + import json + + try: + # Try to parse as JSON + promptData = json.loads(promptContent) + promptModel = AiCallPromptImage(**promptData) + except: + # If not JSON, use plain text prompt + promptModel = AiCallPromptImage( + prompt=promptContent, + size=options.size if options and hasattr(options, 'size') else "1024x1024", + quality=options.quality if options and hasattr(options, 'quality') else "standard", + style=options.style if options and hasattr(options, 'style') else "vivid" + ) + + # Extract parameters from Pydantic model + prompt = promptModel.prompt + size = promptModel.size or "1024x1024" + quality = promptModel.quality or "standard" + style = promptModel.style or "vivid" logger.debug(f"Starting image generation with prompt: '{prompt[:100]}...'") diff --git a/test_ai_models.py b/test_ai_models.py index 07485087..97d49efb 100644 --- a/test_ai_models.py +++ b/test_ai_models.py @@ -1,31 +1,23 @@ #!/usr/bin/env python3 """ -AI Models Test - Tests IMAGE_ANALYSE functionality on all models that support it +AI Models Test - Tests IMAGE_GENERATE functionality on all models that support it -This script tests all models that have IMAGE_ANALYSE capability, validates that -they can analyze images and return structured content, and analyzes the quality of results. +This script tests all models that have IMAGE_GENERATE capability, validates that +they can generate images from text prompts, and analyzes the quality of results. CODE FLOW ANALYSIS: -1. methodAi.process() is called by AI planner with prompt and documents (images) -2. mainServiceAi.callAiDocuments() is called - -> delegates to subCoreAi.callAiDocuments() - -> which calls subDocumentProcessing.callAiText() - -> which processes chunks and detects images - -> for image chunks, calls subCoreAi.readImage() - -> which calls aiObjects.callImage() with operationType=IMAGE_ANALYSE - -OR direct call: -- mainServiceAi.readImage() can be called directly (used in this test) - -> delegates to subCoreAi.readImage() - -> which calls aiObjects.callImage() with operationType=IMAGE_ANALYSE +1. methodAi.generateImage() is called with prompt and optional size/quality/style +2. mainServiceAi.generateImage() is called + -> delegates to subCoreAi.generateImage() + -> which calls aiObjects.generateImage() + -> which creates AiModelCall and calls model.functionCall() WHERE FUNCTIONS ARE USED: -- mainServiceAi.readImage(): Public API entry point for direct image analysis - mainServiceAi.generateImage(): Public API entry point for image generation -- subCoreAi.readImage(): Internal implementation, called by document processing or directly - subCoreAi.generateImage(): Internal implementation, called by mainServiceAi -- subDocumentProcessing._processChunksWithMapping(): Detects image chunks and calls readImage() +- aiObjects.generateImage(): Creates standardized call and invokes model +- model.functionCall(): Direct model plugin call (e.g., DALL-E 3) """ import asyncio @@ -67,30 +59,6 @@ class AIModelsTester: # Create modeltest subdirectory self.modelTestDir = os.path.join(self.logsDir, "modeltest") os.makedirs(self.modelTestDir, exist_ok=True) - - # Copy test image to modeltest directory if it exists - testImageSource = os.path.join(self.logsDir, "_testdata_photo_2025-06-03_13-05-52.jpg") - testImageDest = os.path.join(self.modelTestDir, "_testdata_photo_2025-06-03_13-05-52.jpg") - if os.path.exists(testImageSource) and not os.path.exists(testImageDest): - import shutil - shutil.copy2(testImageSource, testImageDest) - print(f"📷 Test image copied to: {testImageDest}") - - # Find test image - self.testImagePath = None - if os.path.exists(testImageDest): - self.testImagePath = testImageDest - else: - # Try to find any image in modeltest directory - for file in os.listdir(self.modelTestDir): - if file.lower().endswith(('.jpg', '.jpeg', '.png')): - self.testImagePath = os.path.join(self.modelTestDir, file) - break - - if self.testImagePath: - print(f"📷 Using test image: {self.testImagePath}") - else: - print(f"⚠️ No test image found in {self.modelTestDir}") async def initialize(self): """Initialize the AI service.""" @@ -140,48 +108,20 @@ class AIModelsTester: print(f"📁 Results will be saved to: {self.modelTestDir}") async def testModel(self, modelName: str) -> Dict[str, Any]: - """Test a specific AI model with IMAGE_ANALYSE operation.""" + """Test a specific AI model with IMAGE_GENERATE operation.""" print(f"\n{'='*60}") print(f"TESTING MODEL: {modelName}") - print(f"OPERATION TYPE: IMAGE_ANALYSE") + print(f"OPERATION TYPE: IMAGE_GENERATE") print(f"{'='*60}") - # Check if test image exists - if not self.testImagePath or not os.path.exists(self.testImagePath): - result = { - "modelName": modelName, - "status": "ERROR", - "processingTime": 0.0, - "responseLength": 0, - "responseType": "error", - "hasContent": False, - "error": "No test image available", - "fullResponse": "" - } - self.testResults.append(result) - return result + # Test prompt for image generation + testPrompt = 'Create a creative birthday cake designed to look like a monster truck tire/wheel. The cake appears to be chocolate-flavored and is decorated to resemble a large black tire with treads around the sides. On top of the cake, there is a mound of chocolate cake or brownie material meant to look like dirt or mud, with a toy monster truck positioned on top. The monster truck has large wheels and appears to be reddish in color. There are several small decorative flags in light blue and mint green colors stuck into the "dirt" mound. The words "HAPPY BIRTHDAY" are written in white letters around the side of the tire-shaped cake. The image appears to be from Yandex Images, as indicated by Russian text at the bottom. The status bar at the top shows 13:02 time and 82% battery level.' + size = "1024x1024" + quality = "standard" + style = "vivid" - # Test prompt for image analysis - testPrompt = "Analyze this image and describe what you see. Extract any text, numbers, or structured data." - - print(f"Test image: {self.testImagePath}") print(f"Test prompt: {testPrompt}") - - # Load image data - with open(self.testImagePath, 'rb') as f: - imageData = f.read() - - print(f"Image size: {len(imageData)} bytes") - - # Determine image MIME type from extension - if self.testImagePath.lower().endswith('.png'): - mimeType = "image/png" - elif self.testImagePath.lower().endswith(('.jpg', '.jpeg')): - mimeType = "image/jpeg" - else: - mimeType = "image/jpeg" # Default - - print(f"Image MIME type: {mimeType}") + print(f"Size: {size}, Quality: {quality}, Style: {style}") startTime = asyncio.get_event_loop().time() @@ -193,37 +133,25 @@ class AIModelsTester: if not model: raise Exception(f"Model {modelName} not found") - # Import base64 for image data conversion - import base64 - - # Convert image data to base64 string - if isinstance(imageData, bytes): - imageDataStr = base64.b64encode(imageData).decode('utf-8') - else: - imageDataStr = imageData - - # Create messages in vision format + # Create messages for image generation (plain text prompt) messages = [ { "role": "user", - "content": [ - {"type": "text", "text": testPrompt}, - { - "type": "image_url", - "image_url": { - "url": f"data:{mimeType};base64,{imageDataStr}" - } - } - ] + "content": testPrompt } ] - # Create model call + # Create model call with image generation parameters from modules.datamodels.datamodelAi import AiModelCall, AiCallOptions modelCall = AiModelCall( messages=messages, model=model, - options=AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE) + options=AiCallOptions( + operationType=OperationTypeEnum.IMAGE_GENERATE, + size=size, + quality=quality, + style=style + ) ) # Call model directly @@ -238,35 +166,48 @@ class AIModelsTester: endTime = asyncio.get_event_loop().time() processingTime = endTime - startTime - # Analyze result (string response from readImage) + # Analyze result (base64 image data) if result: analysisResult = { "modelName": modelName, "status": "SUCCESS", "processingTime": round(processingTime, 2), "responseLength": len(result) if result else 0, - "responseType": "string", + "responseType": "base64_image", "hasContent": True, "error": None, "testPrompt": testPrompt, - "imagePath": self.testImagePath, - "imageSize": len(imageData), - "mimeType": mimeType + "size": size, + "quality": quality, + "style": style, + "isBase64": result.startswith("data:image") if isinstance(result, str) else False } - # Try to parse as JSON + # Check if result is base64 + import base64 try: - json.loads(result) - analysisResult["isValidJson"] = True + # If it's a data URL, extract the base64 part + if result.startswith("data:image"): + base64Data = result.split(",")[1] if "," in result else result + else: + base64Data = result + + # Try to decode to verify it's valid base64 + imageBytes = base64.b64decode(base64Data) + analysisResult["isValidBase64"] = True + analysisResult["imageByteSize"] = len(imageBytes) except: - analysisResult["isValidJson"] = False + analysisResult["isValidBase64"] = False + analysisResult["imageByteSize"] = 0 - analysisResult["responsePreview"] = result[:200] + "..." if len(result) > 200 else result + analysisResult["responsePreview"] = result[:100] + "..." if len(result) > 100 else result analysisResult["fullResponse"] = result print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s") print(f"📄 Response length: {len(result)} characters") - print(f"📄 Response preview: {analysisResult['responsePreview']}") + print(f"🖼️ Valid base64: {analysisResult.get('isValidBase64', False)}") + if analysisResult.get('imageByteSize'): + print(f"🖼️ Image size: {analysisResult['imageByteSize']} bytes") result = analysisResult @@ -298,9 +239,9 @@ class AIModelsTester: "hasContent": False, "error": str(e), "testPrompt": testPrompt, - "imagePath": self.testImagePath, - "imageSize": len(imageData) if imageData else 0, - "mimeType": mimeType + "size": size, + "quality": quality, + "style": style } print(f"💥 EXCEPTION - {str(e)}") @@ -317,47 +258,73 @@ class AIModelsTester: return result def _saveImageResponse(self, modelName: str, result: Dict[str, Any]): - """Save image analysis response to file.""" + """Save image generation response as image file.""" try: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"{modelName}_{timestamp}.txt" + + # Save as image file + filename = f"{modelName}_{timestamp}.png" filepath = os.path.join(self.modelTestDir, filename) - # Prepare content for saving + # Get image data content = result.get("fullResponse", "") + if not content: - content = result.get("responsePreview", "No content available") + print(f"⚠️ No image data to save for {modelName}") + return - # If there's an error, include it in the content - if result.get("error"): - content = f"ERROR: {result.get('error')}\n\n{content}" + # Decode base64 image data + import base64 - # Add metadata header - metadata = f"""Model: {modelName} -Test Time: {timestamp} -Status: {result.get('status', 'Unknown')} -Processing Time: {result.get('processingTime', 0):.2f}s -Response Length: {result.get('responseLength', 0)} characters -Is Valid JSON: {result.get('isValidJson', False)} -Image Path: {result.get('imagePath', 'N/A')} -Image Size: {result.get('imageSize', 'N/A')} bytes -MIME Type: {result.get('mimeType', 'N/A')} - ---- TEST PROMPT --- -{result.get('testPrompt', 'N/A')} - ---- RESPONSE CONTENT --- -{content} -""" - - with open(filepath, 'w', encoding='utf-8') as f: - f.write(metadata) - - result["savedTextFile"] = filepath - print(f"📄 Analysis response saved: {filepath}") + try: + # Extract base64 data if it's a data URL + if content.startswith("data:image"): + base64Data = content.split(",")[1] if "," in content else content + else: + base64Data = content + + # Decode base64 to bytes + imageBytes = base64.b64decode(base64Data) + + # Save image file + with open(filepath, 'wb') as f: + f.write(imageBytes) + + result["savedImageFile"] = filepath + print(f"🖼️ Image saved: {filepath}") + + # Also save metadata as JSON + metadata = { + "modelName": modelName, + "timestamp": timestamp, + "status": result.get('status', 'Unknown'), + "processingTime": result.get('processingTime', 0), + "responseLength": result.get('responseLength', 0), + "isValidBase64": result.get('isValidBase64', False), + "imageByteSize": len(imageBytes), + "size": result.get('size', 'N/A'), + "quality": result.get('quality', 'N/A'), + "style": result.get('style', 'N/A'), + "testPrompt": result.get('testPrompt', 'N/A'), + "imageFile": filename + } + + metadataFile = os.path.join(self.modelTestDir, f"{modelName}_{timestamp}_metadata.json") + with open(metadataFile, 'w', encoding='utf-8') as f: + json.dump(metadata, f, indent=2, ensure_ascii=False) + + print(f"📄 Metadata saved: {metadataFile}") + + except Exception as decodeError: + print(f"❌ Error decoding base64 image data: {str(decodeError)}") + # Fall back to saving as text file + textFile = os.path.join(self.modelTestDir, f"{modelName}_{timestamp}.txt") + with open(textFile, 'w', encoding='utf-8') as f: + f.write(f"Error decoding image:\n{str(decodeError)}\n\nBase64 data:\n{content[:500]}...") + print(f"📄 Saved base64 data as text: {textFile}") except Exception as e: - print(f"❌ Error saving analysis response: {str(e)}") + print(f"❌ Error saving image generation response: {str(e)}") result["saveError"] = str(e) def _saveTextResponse(self, modelName: str, result: Dict[str, Any]): @@ -462,35 +429,24 @@ Width: {crawlWidth} result["crawlValidationError"] = str(e) def _validateImageResponse(self, modelName: str, result: Dict[str, Any]): - """Validate that the IMAGE_ANALYSE response contains analyzed content.""" + """Validate that the IMAGE_GENERATE response contains a valid base64 image.""" try: content = result.get("fullResponse", "") - # Check if content is meaningful + # Check if content is a valid base64 image hasContent = bool(content and len(content.strip()) > 0) - contentLength = len(content) - result["hasContent"] = hasContent - result["contentLength"] = contentLength - # Try to determine what kind of content was extracted if hasContent: - # Check if it's structured data - isStructured = False - try: - parsed = json.loads(content) - if isinstance(parsed, dict): - isStructured = True - except: - pass + isBase64 = result.get("isValidBase64", False) + imageSize = result.get("imageByteSize", 0) + imageSizeKB = imageSize / 1024 if imageSize > 0 else 0 - result["isStructured"] = isStructured - - print(f"✅ Successfully analyzed image") - print(f" Content length: {contentLength} characters") - print(f" Is structured: {'Yes' if isStructured else 'No'}") + print(f"✅ Successfully generated image") + print(f" Image size: {imageSizeKB:.2f} KB ({imageSize} bytes)") + print(f" Valid base64: {'Yes' if isBase64 else 'No'}") else: - print(f"⚠️ Empty or invalid image analysis response") + print(f"⚠️ Empty or invalid image generation response") except Exception as e: print(f"❌ Error validating image response: {str(e)}") @@ -652,30 +608,30 @@ Width: {crawlWidth} print(f"❌ Error saving individual result: {str(e)}") def getAllAvailableModels(self) -> List[str]: - """Get all available model names that support IMAGE_ANALYSE.""" + """Get all available model names that support IMAGE_GENERATE.""" from modules.aicore.aicoreModelRegistry import modelRegistry from modules.datamodels.datamodelAi import OperationTypeEnum # Get all models from registry allModels = modelRegistry.getAvailableModels() - # Filter models that support IMAGE_ANALYSE - imageAnalyseModels = [] + # Filter models that support IMAGE_GENERATE + imageGenerateModels = [] for model in allModels: if model.operationTypes and any( - ot.operationType == OperationTypeEnum.IMAGE_ANALYSE + ot.operationType == OperationTypeEnum.IMAGE_GENERATE for ot in model.operationTypes ): - imageAnalyseModels.append(model.name) + imageGenerateModels.append(model.name) # Filter to common models for testing (remove filter to test all models) - # imageAnalyseModels = [m for m in imageAnalyseModels if "gpt" in m.lower() or "claude" in m.lower()] + # imageGenerateModels = [m for m in imageGenerateModels if "dall-e" in m.lower()] - print(f"Found {len(imageAnalyseModels)} models that support IMAGE_ANALYSE:") - for modelName in imageAnalyseModels: + print(f"Found {len(imageGenerateModels)} models that support IMAGE_GENERATE:") + for modelName in imageGenerateModels: print(f" - {modelName}") - return imageAnalyseModels + return imageGenerateModels def saveTestResults(self): """Save detailed test results to file.""" @@ -791,10 +747,10 @@ Width: {crawlWidth} print(f"📊 Total pages crawled across all models: {totalPages} pages") async def main(): - """Run AI models testing for WEB_CRAWL operation.""" + """Run AI models testing for IMAGE_GENERATE operation.""" tester = AIModelsTester() - print("Starting AI Models Testing for IMAGE_ANALYSE...") + print("Starting AI Models Testing for IMAGE_GENERATE...") print("Initializing AI service...") await tester.initialize() @@ -806,9 +762,9 @@ async def main(): print(f" {i}. {model}") print(f"\n{'='*80}") - print("STARTING IMAGE_ANALYSE TESTS") + print("STARTING IMAGE_GENERATE TESTS") print(f"{'='*80}") - print("Testing each model's ability to analyze images and return structured content...") + print("Testing each model's ability to generate images from text prompts...") print("Press Enter after each model test to continue to the next one...") # Test each model individually