import logging from typing import Dict, Any, List, Union, Optional from modules.connectors.connectorAiOpenai import AiOpenai, ContextLengthExceededException from modules.connectors.connectorAiAnthropic import AiAnthropic from modules.services.serviceDocument.documentExtraction import DocumentExtraction from modules.interfaces.interfaceChatModel import ChatDocument logger = logging.getLogger(__name__) # AI Model Registry with Performance Data AI_MODELS = { "openai_gpt4o": { "connector": "openai", "max_tokens": 128000, "cost_per_1k_tokens": 0.03, # Input "cost_per_1k_tokens_output": 0.06, # Output "speed_rating": 8, # 1-10 "quality_rating": 9, # 1-10 "supports_images": True, "supports_documents": True, "context_length": 128000, "model_name": "gpt-4o" }, "openai_gpt35": { "connector": "openai", "max_tokens": 16000, "cost_per_1k_tokens": 0.0015, "cost_per_1k_tokens_output": 0.002, "speed_rating": 9, "quality_rating": 7, "supports_images": False, "supports_documents": True, "context_length": 16000, "model_name": "gpt-3.5-turbo" }, "anthropic_claude": { "connector": "anthropic", "max_tokens": 200000, "cost_per_1k_tokens": 0.015, "cost_per_1k_tokens_output": 0.075, "speed_rating": 7, "quality_rating": 10, "supports_images": True, "supports_documents": True, "context_length": 200000, "model_name": "claude-3-sonnet-20240229" } } class AiCalls: """Interface for AI service interactions with centralized call method""" def __init__(self): self.openaiService = AiOpenai() self.anthropicService = AiAnthropic() self.document_extractor = DocumentExtraction() async def callAi( self, prompt: str, documents: List[ChatDocument] = None, operation_type: str = "general", priority: str = "balanced", # "speed", "quality", "cost", "balanced" compress_prompt: bool = True, compress_documents: bool = True, process_documents_individually: bool = False, max_cost: float = None, max_processing_time: int = None ) -> str: """ Zentrale AI Call Methode mit intelligenter Modell-Auswahl und Content-Verarbeitung. Args: prompt: Der Hauptprompt für die AI documents: Liste von Dokumenten zur Verarbeitung operation_type: Art der Operation ("general", "document_analysis", "image_analysis", etc.) priority: Priorität für Modell-Auswahl ("speed", "quality", "cost", "balanced") compress_prompt: Ob der Prompt komprimiert werden soll compress_documents: Ob Dokumente komprimiert werden sollen process_documents_individually: Ob Dokumente einzeln verarbeitet werden sollen max_cost: Maximale Kosten für den Call max_processing_time: Maximale Verarbeitungszeit in Sekunden Returns: AI Response als String """ try: # 1. Dokumente verarbeiten falls vorhanden document_content = "" if documents: document_content = await self._process_documents_for_ai( documents, operation_type, compress_documents, process_documents_individually ) # 2. Bestes Modell basierend auf Priorität und Content auswählen selected_model = self._select_optimal_model( prompt, document_content, priority, operation_type, max_cost, max_processing_time ) # 3. Content für das gewählte Modell optimieren optimized_prompt, optimized_content = await self._optimize_content_for_model( prompt, document_content, selected_model, compress_prompt, compress_documents ) # 4. AI Call mit Failover ausführen return await self._execute_ai_call_with_failover( selected_model, optimized_prompt, optimized_content ) except Exception as e: logger.error(f"Error in centralized AI call: {str(e)}") return f"Error: {str(e)}" def _select_optimal_model( self, prompt: str, document_content: str, priority: str, operation_type: str, max_cost: float = None, max_processing_time: int = None ) -> str: """Wählt das optimale Modell basierend auf Priorität und Content aus""" # Content-Größe berechnen total_content_size = len(prompt.encode('utf-8')) + len(document_content.encode('utf-8')) # Verfügbare Modelle filtern available_models = {} for model_name, model_info in AI_MODELS.items(): # Prüfe ob Modell für Content-Größe geeignet ist if total_content_size > model_info["context_length"] * 0.8: # 80% für Content continue # Prüfe Kosten-Limit if max_cost: estimated_cost = self._estimate_cost(model_info, total_content_size) if estimated_cost > max_cost: continue # Prüfe Operation-Type Kompatibilität if operation_type == "image_analysis" and not model_info["supports_images"]: continue available_models[model_name] = model_info if not available_models: # Fallback zum kleinsten Modell return "openai_gpt35" # Modell basierend auf Priorität auswählen if priority == "speed": return max(available_models.keys(), key=lambda x: available_models[x]["speed_rating"]) elif priority == "quality": return max(available_models.keys(), key=lambda x: available_models[x]["quality_rating"]) elif priority == "cost": return min(available_models.keys(), key=lambda x: available_models[x]["cost_per_1k_tokens"]) else: # balanced # Gewichtete Bewertung: 40% Qualität, 30% Geschwindigkeit, 30% Kosten def balanced_score(model_name): model_info = available_models[model_name] quality_score = model_info["quality_rating"] * 0.4 speed_score = model_info["speed_rating"] * 0.3 cost_score = (10 - (model_info["cost_per_1k_tokens"] * 1000)) * 0.3 # Niedrigere Kosten = höherer Score return quality_score + speed_score + cost_score return max(available_models.keys(), key=balanced_score) def _estimate_cost(self, model_info: Dict, content_size: int) -> float: """Schätzt die Kosten für einen AI Call""" # Grobe Schätzung: 1 Token ≈ 4 Zeichen estimated_tokens = content_size / 4 input_cost = (estimated_tokens / 1000) * model_info["cost_per_1k_tokens"] output_cost = (estimated_tokens / 1000) * model_info["cost_per_1k_tokens_output"] * 0.1 # 10% für Output return input_cost + output_cost async def _process_documents_for_ai( self, documents: List[ChatDocument], operation_type: str, compress_documents: bool, process_individually: bool ) -> str: """Verarbeitet Dokumente für AI Call mit documentExtraction.py""" if not documents: return "" processed_contents = [] for doc in documents: try: # Extrahiere Content mit documentExtraction.py extracted = await self.document_extractor.processFileData( doc.fileData, doc.fileName, doc.mimeType, prompt=f"Extract relevant content for {operation_type}", documentId=doc.id, enableAI=True ) # Kombiniere alle Content-Items doc_content = [] for content_item in extracted.contents: if content_item.data and content_item.data.strip(): doc_content.append(content_item.data) if doc_content: combined_doc_content = "\n\n".join(doc_content) # Komprimiere falls gewünscht if compress_documents and len(combined_doc_content.encode('utf-8')) > 10000: # 10KB Limit combined_doc_content = await self._compress_content( combined_doc_content, 10000, "document" ) processed_contents.append(f"Document: {doc.fileName}\n{combined_doc_content}") except Exception as e: logger.warning(f"Error processing document {doc.fileName}: {str(e)}") processed_contents.append(f"Document: {doc.fileName}\n[Error processing document: {str(e)}]") return "\n\n---\n\n".join(processed_contents) async def _optimize_content_for_model( self, prompt: str, document_content: str, model_name: str, compress_prompt: bool, compress_documents: bool ) -> tuple[str, str]: """Optimiert Content für das gewählte Modell""" model_info = AI_MODELS[model_name] max_content_size = model_info["context_length"] * 0.7 # 70% für Content optimized_prompt = prompt optimized_content = document_content # Prompt komprimieren falls gewünscht if compress_prompt and len(prompt.encode('utf-8')) > 2000: # 2KB Limit für Prompt optimized_prompt = await self._compress_content(prompt, 2000, "prompt") # Dokument-Content komprimieren falls gewünscht if compress_documents and document_content: content_size = len(document_content.encode('utf-8')) if content_size > max_content_size: optimized_content = await self._compress_content( document_content, int(max_content_size), "document" ) return optimized_prompt, optimized_content async def _compress_content(self, content: str, target_size: int, content_type: str) -> str: """Komprimiert Content intelligent basierend auf Typ""" if len(content.encode('utf-8')) <= target_size: return content try: # Verwende AI für intelligente Kompression compression_prompt = f""" Komprimiere den folgenden {content_type} auf maximal {target_size} Zeichen, behalte aber alle wichtigen Informationen bei: {content} Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen. """ # Verwende das schnellste verfügbare Modell für Kompression compression_model = "openai_gpt35" model_info = AI_MODELS[compression_model] connector = getattr(self, f"{model_info['connector']}Service") messages = [{"role": "user", "content": compression_prompt}] if model_info["connector"] == "openai": compressed = await connector.callAiBasic(messages) else: response = await connector.callAiBasic(messages) compressed = response["choices"][0]["message"]["content"] return compressed except Exception as e: logger.warning(f"AI compression failed, using truncation: {str(e)}") # Fallback: Einfache Truncation return content[:target_size] + "... [truncated]" async def _execute_ai_call_with_failover( self, model_name: str, prompt: str, document_content: str ) -> str: """Führt AI Call mit automatischem Failover aus""" try: model_info = AI_MODELS[model_name] connector = getattr(self, f"{model_info['connector']}Service") # Messages vorbereiten messages = [] if document_content: messages.append({ "role": "system", "content": f"Context from documents:\n{document_content}" }) messages.append({ "role": "user", "content": prompt }) # AI Call ausführen if model_info["connector"] == "openai": return await connector.callAiBasic(messages) else: # anthropic response = await connector.callAiBasic(messages) return response["choices"][0]["message"]["content"] except ContextLengthExceededException: logger.warning(f"Context length exceeded for {model_name}, trying fallback") # Fallback zu Modell mit größerem Context fallback_model = self._find_fallback_model(model_name) if fallback_model: return await self._execute_ai_call_with_failover(fallback_model, prompt, document_content) else: # Letzter Ausweg: Content weiter komprimieren compressed_prompt = await self._compress_content(prompt, 1000, "prompt") compressed_content = await self._compress_content(document_content, 5000, "document") return await self._execute_ai_call_with_failover("openai_gpt35", compressed_prompt, compressed_content) except Exception as e: logger.warning(f"AI call failed with {model_name}: {e}") # Allgemeiner Fallback return await self._execute_ai_call_with_failover("openai_gpt35", prompt, document_content) def _find_fallback_model(self, current_model: str) -> Optional[str]: """Findet ein Fallback-Modell mit größerem Context""" current_context = AI_MODELS[current_model]["context_length"] # Suche Modell mit größerem Context for model_name, model_info in AI_MODELS.items(): if model_info["context_length"] > current_context: return model_name return None # Legacy methods async def callAiTextBasic(self, prompt: str, context: Optional[str] = None) -> str: """ Basic text processing - now uses centralized AI call method. Args: prompt: The user prompt to process context: Optional system context/prompt Returns: The AI response as text """ # Combine context with prompt if provided full_prompt = prompt if context: full_prompt = f"Context: {context}\n\nUser Request: {prompt}" # Use centralized AI call with speed priority for basic calls return await self.callAi( prompt=full_prompt, priority="speed", compress_prompt=True, compress_documents=False ) async def callAiTextAdvanced(self, prompt: str, context: Optional[str] = None, _is_fallback: bool = False) -> str: """ Advanced text processing - now uses centralized AI call method. Args: prompt: The user prompt to process context: Optional system context/prompt _is_fallback: Internal flag (kept for compatibility) Returns: The AI response as text """ # Combine context with prompt if provided full_prompt = prompt if context: full_prompt = f"Context: {context}\n\nUser Request: {prompt}" # Use centralized AI call with quality priority for advanced calls return await self.callAi( prompt=full_prompt, priority="quality", compress_prompt=False, compress_documents=False ) async def callAiImageBasic(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str: """ Basic image processing - now uses centralized AI call method. Args: prompt: The prompt for image analysis imageData: The image data (file path or bytes) mimeType: Optional MIME type of the image Returns: The AI response as text """ try: # For image processing, use the original connector directly # as the centralized method doesn't handle images yet return await self.openaiService.callAiImage(prompt, imageData, mimeType) except Exception as e: logger.error(f"Error in OpenAI image call: {str(e)}") return f"Error: {str(e)}" async def callAiImageAdvanced(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str: """ Advanced image processing - now uses centralized AI call method. Args: prompt: The prompt for image analysis imageData: The image data (file path or bytes) mimeType: Optional MIME type of the image Returns: The AI response as text """ try: # For image processing, use the original connector directly # as the centralized method doesn't handle images yet return await self.anthropicService.callAiImage(prompt, imageData, mimeType) except Exception as e: logger.error(f"Error in Anthropic image call: {str(e)}") return f"Error: {str(e)}" # Convenience methods for common use cases async def callAiForDocumentAnalysis( self, prompt: str, documents: List[ChatDocument], priority: str = "balanced" ) -> str: """Convenience method for document analysis""" return await self.callAi( prompt=prompt, documents=documents, operation_type="document_analysis", priority=priority, compress_documents=True, process_documents_individually=False ) async def callAiForReportGeneration( self, prompt: str, documents: List[ChatDocument], priority: str = "quality" ) -> str: """Convenience method for report generation""" return await self.callAi( prompt=prompt, documents=documents, operation_type="report_generation", priority=priority, compress_documents=True, process_documents_individually=True ) async def callAiForEmailComposition( self, prompt: str, documents: List[ChatDocument] = None, priority: str = "speed" ) -> str: """Convenience method for email composition""" return await self.callAi( prompt=prompt, documents=documents, operation_type="email_composition", priority=priority, compress_prompt=True, compress_documents=True ) async def callAiForTaskPlanning( self, prompt: str, documents: List[ChatDocument] = None, priority: str = "balanced" ) -> str: """Convenience method for task planning""" return await self.callAi( prompt=prompt, documents=documents, operation_type="task_planning", priority=priority, compress_prompt=False, compress_documents=True )