diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py index c6f21423..5c1e87b5 100644 --- a/modules/aicore/aicorePluginAnthropic.py +++ b/modules/aicore/aicorePluginAnthropic.py @@ -30,6 +30,8 @@ def _supportsCustomTemperature(modelName: str) -> bool: if not modelName: return True name = modelName.lower() + if name.startswith("claude-opus-4-8"): + return False if name.startswith("claude-opus-4-7"): return False if name.startswith("claude-sonnet-4-7"): @@ -78,6 +80,54 @@ class AiAnthropic(BaseConnectorAi): def getModels(self) -> List[AiModel]: # Get all available Anthropic models. return [ + AiModel( + name="claude-opus-4-8", + displayName="Anthropic Claude Opus 4.8", + connectorType="anthropic", + apiUrl="https://api.anthropic.com/v1/messages", + temperature=0.2, + maxTokens=128000, + contextLength=1000000, + costPer1kTokensInput=0.005, # $5/M tokens (Anthropic API, 2026-05) + costPer1kTokensOutput=0.025, # $25/M tokens + speedRating=5, + qualityRating=10, + functionCall=self.callAiBasic, + functionCallStream=self.callAiBasicStream, + priority=PriorityEnum.QUALITY, + processingMode=ProcessingModeEnum.DETAILED, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.PLAN, 10), + (OperationTypeEnum.DATA_ANALYSE, 9), + (OperationTypeEnum.DATA_GENERATE, 10), + (OperationTypeEnum.DATA_EXTRACT, 9), + (OperationTypeEnum.AGENT, 10), + (OperationTypeEnum.DATA_QUERY, 3), + ), + version="claude-opus-4-8", + calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.025 + ), + AiModel( + name="claude-opus-4-8", + displayName="Anthropic Claude Opus 4.8 Vision", + connectorType="anthropic", + apiUrl="https://api.anthropic.com/v1/messages", + temperature=0.2, + maxTokens=128000, + contextLength=1000000, + costPer1kTokensInput=0.005, + costPer1kTokensOutput=0.025, + speedRating=5, + qualityRating=10, + functionCall=self.callAiImage, + priority=PriorityEnum.QUALITY, + processingMode=ProcessingModeEnum.DETAILED, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.IMAGE_ANALYSE, 10) + ), + version="claude-opus-4-8", + calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.025 + ), AiModel( name="claude-opus-4-7", displayName="Anthropic Claude Opus 4.7", diff --git a/modules/aicore/aicorePluginPrivateLlm.py b/modules/aicore/aicorePluginPrivateLlm.py index 65841916..20705e56 100644 --- a/modules/aicore/aicorePluginPrivateLlm.py +++ b/modules/aicore/aicorePluginPrivateLlm.py @@ -6,14 +6,21 @@ AI Connector for PowerOn Private-LLM Service. Connects to the private-llm service running on-premise with Ollama backend. Provides OCR and Vision capabilities via local AI models. -Models: -- poweron-text-general: Text (qwen2.5); NEUTRALIZATION_TEXT + data/plan ops -- poweron-vision-general: Vision (qwen2.5vl); IMAGE_ANALYSE + NEUTRALIZATION_IMAGE +Models (current — L4 24 GB): +- poweron-text-general: Text (qwen2.5:7b); NEUTRALIZATION_TEXT + data/plan ops +- poweron-vision-general: Vision (qwen2.5vl:7b); IMAGE_ANALYSE + NEUTRALIZATION_IMAGE - poweron-vision-deep: Vision (granite3.2); IMAGE_ANALYSE + NEUTRALIZATION_IMAGE +Models (next-gen — RTX PRO 6000 96 GB, auto-activated when pulled in Ollama): +- poweron-text-reasoning: Reasoning (deepseek-r1:70b); complex logic, math, planning +- poweron-vision-general: Vision (llama4:scout); multimodal, long-context documents +- poweron-embed: Embedding (nomic-embed-text); local RAG embedding + Pricing (CHF per call): - Text models: CHF 0.010 - Vision models: CHF 0.100 +- Reasoning models: CHF 0.050 +- Embedding: CHF 0.000 (flat rate) """ import logging @@ -39,6 +46,8 @@ logger = logging.getLogger(__name__) # Pricing constants (CHF) PRICE_TEXT_PER_CALL = 0.01 # CHF 0.010 per text model call PRICE_VISION_PER_CALL = 0.10 # CHF 0.100 per vision model call +PRICE_REASONING_PER_CALL = 0.05 # CHF 0.050 per reasoning call (longer runtime) +PRICE_EMBED_PER_CALL = 0.00 # CHF 0.000 flat rate (local embedding) # Private-LLM Service URL (fix, nicht via env konfigurierbar) @@ -306,6 +315,88 @@ class AiPrivateLlm(BaseConnectorAi): ), "ollamaModel": "granite3.2-vision" }, + # --- Next-gen models (auto-activated when available in Ollama) --- + # Reasoning Model (deepseek-r1:70b — chain-of-thought, math, logic) + { + "model": AiModel( + name="poweron-text-reasoning", + displayName="PowerOn Reasoning", + connectorType="privatellm", + apiUrl=f"{self.baseUrl}/api/analyze", + temperature=0.1, + maxTokens=8192, + contextLength=65536, + costPer1kTokensInput=0.0, + costPer1kTokensOutput=0.0, + speedRating=5, + qualityRating=10, + functionCall=self.callAiText, + priority=PriorityEnum.QUALITY, + processingMode=ProcessingModeEnum.DETAILED, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.PLAN, 10), + (OperationTypeEnum.DATA_ANALYSE, 10), + (OperationTypeEnum.DATA_GENERATE, 9), + (OperationTypeEnum.DATA_EXTRACT, 9), + (OperationTypeEnum.NEUTRALIZATION_TEXT, 10), + (OperationTypeEnum.AGENT, 9), + ), + version="deepseek-r1:70b", + calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_REASONING_PER_CALL + ), + "ollamaModel": "deepseek-r1:70b" + }, + # Vision Multimodal (llama4:scout — native vision, 10M context) + { + "model": AiModel( + name="poweron-vision-multimodal", + displayName="PowerOn Vision Multimodal", + connectorType="privatellm", + apiUrl=f"{self.baseUrl}/api/analyze", + temperature=0.2, + maxTokens=4096, + contextLength=131072, + costPer1kTokensInput=0.0, + costPer1kTokensOutput=0.0, + speedRating=7, + qualityRating=10, + functionCall=self.callAiVision, + priority=PriorityEnum.QUALITY, + processingMode=ProcessingModeEnum.DETAILED, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.IMAGE_ANALYSE, 10), + (OperationTypeEnum.NEUTRALIZATION_IMAGE, 10), + ), + version="llama4:scout", + calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL + ), + "ollamaModel": "llama4:scout" + }, + # Local Embedding (nomic-embed-text — replaces OpenAI text-embedding-3-small) + { + "model": AiModel( + name="poweron-embed", + displayName="PowerOn Embedding", + connectorType="privatellm", + apiUrl=f"{self.baseUrl}/v1/embeddings", + temperature=0.0, + maxTokens=0, + contextLength=8192, + costPer1kTokensInput=0.0, + costPer1kTokensOutput=0.0, + speedRating=10, + qualityRating=8, + functionCall=self.callAiText, + priority=PriorityEnum.COST, + processingMode=ProcessingModeEnum.BASIC, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.EMBEDDING, 9), + ), + version="nomic-embed-text", + calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_EMBED_PER_CALL + ), + "ollamaModel": "nomic-embed-text" + }, ] # Filter models by Ollama availability @@ -320,7 +411,7 @@ class AiPrivateLlm(BaseConnectorAi): unavailableModels.append(modelDef["model"].name) if unavailableModels: - logger.warning( + logger.info( f"Private-LLM: {len(unavailableModels)} models not available in Ollama: {', '.join(unavailableModels)}. " f"Install with: ollama pull " ) diff --git a/modules/connectors/connectorDbPostgre.py b/modules/connectors/connectorDbPostgre.py index fa4cba44..2126a65c 100644 --- a/modules/connectors/connectorDbPostgre.py +++ b/modules/connectors/connectorDbPostgre.py @@ -868,6 +868,8 @@ class DatabaseConnector: desired_sql = (model_fields.get(col) or "").upper() currentType = existing_column_types.get(col, "") migration = _SAFE_TYPE_CHANGES.get((currentType, desired_sql)) + if not migration and desired_sql.startswith("VECTOR") and currentType == "text": + migration = f'{desired_sql} USING CASE WHEN "{col}" IS NULL OR "{col}" = \'\' THEN NULL ELSE "{col}"::vector END' if migration: castExpr = migration.replace("{col}", col) try: diff --git a/modules/serviceCenter/services/serviceAi/subStructureFilling.py b/modules/serviceCenter/services/serviceAi/subStructureFilling.py index 5682fbb2..dbf289fd 100644 --- a/modules/serviceCenter/services/serviceAi/subStructureFilling.py +++ b/modules/serviceCenter/services/serviceAi/subStructureFilling.py @@ -2662,6 +2662,17 @@ CRITICAL: if tableCount > 0: logger.info(f"Section {sectionId}: JSON block {i+1}: {tableCount} table(s) with {rowCount} total rows") + # Validate table elements: AI sometimes returns content as string instead of {headers, rows} + if contentType == "table": + for idx, elem in enumerate(allElements): + if not isinstance(elem, dict) or elem.get("type") != "table": + continue + content = elem.get("content") + if isinstance(content, str): + normalized = self._normalizeTableContentString(content, sectionId, idx) + elem["content"] = normalized + logger.info(f"Section {sectionId}: Normalized string table content in element {idx}") + # Merge elements based on contentType if contentType == "table" and len(allElements) > 1: # Find all table elements @@ -2682,6 +2693,45 @@ CRITICAL: return allElements + def _normalizeTableContentString(self, text: str, sectionId: str, elemIndex: int) -> Dict[str, Any]: + """Convert a string table content (CSV, markdown, pipe-delimited) into {headers, rows}.""" + import csv + import io + + lines = [l for l in text.strip().splitlines() if l.strip()] + if not lines: + return {"headers": [], "rows": []} + + # Detect markdown pipe table (| col1 | col2 |) + if "|" in lines[0]: + rows = [] + for line in lines: + stripped = line.strip().strip("|") + if stripped and not all(c in "-| " for c in stripped): + cells = [c.strip() for c in stripped.split("|")] + rows.append(cells) + if rows: + return {"headers": rows[0], "rows": rows[1:]} + + # Detect CSV/TSV + try: + dialect = csv.Sniffer().sniff(lines[0]) + reader = csv.reader(io.StringIO(text.strip()), dialect) + allRows = list(reader) + if allRows: + return {"headers": allRows[0], "rows": allRows[1:]} + except csv.Error: + pass + + # Tab-separated fallback + if "\t" in lines[0]: + rows = [line.split("\t") for line in lines] + return {"headers": rows[0], "rows": rows[1:]} + + # Last resort: single-column table from lines + logger.warning(f"Section {sectionId}: element {elemIndex}: could not parse table string format, wrapping as single column") + return {"headers": ["Value"], "rows": [[line] for line in lines]} + def _canMergeTables(self, tableElements: List[Dict[str, Any]]) -> bool: """Check if tables can be safely merged (same column counts).""" if len(tableElements) <= 1: diff --git a/modules/serviceCenter/services/serviceChat/mainServiceChat.py b/modules/serviceCenter/services/serviceChat/mainServiceChat.py index 61026de0..22eefeed 100644 --- a/modules/serviceCenter/services/serviceChat/mainServiceChat.py +++ b/modules/serviceCenter/services/serviceChat/mainServiceChat.py @@ -36,6 +36,16 @@ class ChatService: """Workflow from context (stable during workflow execution).""" return self._context.workflow + def _chatDocumentFromDb(self, docId: str) -> Optional[ChatDocument]: + """Lookup a ChatDocument directly in the DB by its primary key.""" + try: + results = self.interfaceDbChat.db.getRecordset(ChatDocument, recordFilter={"id": docId}) + if results: + return results[0] + except Exception as e: + logger.debug("_chatDocumentFromDb(%s) failed: %s", docId, e) + return None + def _chat_document_from_management_file(self, file_id: str) -> Optional[ChatDocument]: """Build a ChatDocument when docItem references a management FileItem (e.g. automation uploads) without a chat message.""" try: @@ -157,6 +167,16 @@ class ChatService: if docFound: break + if not docFound: + dbDoc = self._chatDocumentFromDb(docId) + if dbDoc is not None: + allDocuments.append(dbDoc) + docFound = True + logger.info( + "Resolved document reference %r via DB ChatDocument lookup", + docRef, + ) + if not docFound: synth = self._chat_document_from_management_file(docId) if synth is not None: