Merge pull request #106 from valueonag/refactor/service-migrations
Refactor/service migrations
This commit is contained in:
commit
6154eb2553
199 changed files with 10502 additions and 38615 deletions
|
|
@ -44,7 +44,7 @@ APP_FRONTEND_URL = http://localhost:5176
|
|||
# AI configuration
|
||||
Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQnBaSnM4TWFRRmxVQmNQblVIYmc1Y0Q3aW9zZUtDWlNWdGZjbFpncGp2NHN2QjkxMWxibUJnZDBId252MWk5TXN3Yk14ajFIdi1CTkx2ZWx2QzF5OFR6LUx5azQ3dnNLaXJBOHNxc0tlWmtZcTFVelF4eXBSM2JkbHd2eTM0VHNXdHNtVUprZWtPVzctNlJsZHNmM20tU1N6Q1Q2cHFYSi1tNlhZNDNabTVuaEVGWmIydEhadTcyMlBURmw2aUJxOF9GTzR0dTZiNGZfOFlHaVpPZ1A1LXhhOEFtN1J5TEVNNWtMcGpyNkMzSl8xRnZsaTF1WTZrOUZmb0cxVURjSGFLS2dIYTQyZEJtTm90bEYxVWxNNXVPdTVjaVhYbXhxT3JsVDM5VjZMVFZKSE1tZnM9
|
||||
Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09
|
||||
Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQm82Mzk2Q1MwZ0dNcUVBcUtuRDJIcTZkMXVvYnpjM3JEMzJiT1NKSHljX282ZDIyZTJYc09VSTdVNXAtOWU2UXp5S193NTk5dHJsWlFjRjhWektFOG1DVGY4ZUhHTXMzS0RPN1lNcF9nSlVWbW5BZ1hkZDVTejl6bVZNRFVvX29xamJidWRFMmtjQmkyRUQ2RUh6UTN1aWNPSUJBPT0=
|
||||
Connector_AiPerplexity_API_SECRET = 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
|
||||
Connector_AiTavily_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI=
|
||||
Connector_AiPrivateLlm_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGRHM5eFdUVmVZU1R1cHBwN1RlMUx4T0NlLTJLUFFVX3J2OElDWFpuZmJHVmp4Z3BNNWMwZUVVZUd2TFhRSjVmVkVlcFlVRWtybXh0ZHloZ01ZcnVvX195YjdlWVdEcjZSWFFTTlNBWUlaTlNoLWhqVFBIb0thVlBiaWhjYjFQOFY=
|
||||
Connector_AiMistral_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGeEQxYUIxOHhia0JlQWpWQ2dWQWZzY3l6SWwyUnJoR1hRQWloX2lxb2lGNkc4UnA4U2tWNjJaYzB1d1hvNG9fWUp1N3V4OW9FMGhaWVhjSlVwWEc1X2loVDBSZDEtdHdfcTA5QkcxQTR4OHc4RkRzclJrU2d1RFZpNDJkRDRURlE=
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ APP_FRONTEND_URL = https://nyla-int.poweron-center.net
|
|||
# AI configuration
|
||||
Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4MENkQ2xJVmE5WFZKUkh2SHJFby1YVXN3ZmVxRkptS3ZWRmlwdU93ZEJjSjlMV2NGbU5mS3NCdmFfcmFYTEJNZXFIQ3ozTWE4ZC1pemlQNk9wbjU1d3BPS0ZCTTZfOF8yWmVXMWx0TU1DamlJLVFhSTJXclZsY3hMVWlPcXVqQWtMdER4T252NHZUWEhUOTdIN1VGR3ltazEweXFqQ0lvb0hYWmxQQnpxb0JwcFNhRDNGWXdoRTVJWm9FalZpTUF5b1RqZlRaYnVKYkp0NWR5Vko1WWJ0Wmg2VWJzYXZ0Z3Q4UkpsTldDX2dsekhKMmM4YjRoa2RwemMwYVQwM2cyMFlvaU5mOTVTWGlROU8xY2ZVRXlxZzJqWkxURWlGZGI2STZNb0NpdEtWUnM9
|
||||
Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09
|
||||
Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQm82Mzk2UWZJdUFhSW8yc3RKc0tKRXphd0xWMkZOVlFpSGZ4SGhFWnk0cTF5VjlKQVZjdS1QSWdkS0pUSWw4OFU5MjUxdTVQel9aeWVIZTZ5TXRuVmFkZG0zWEdTOGdHMHpsTzI0TGlWYURKU1Q0VVpKTlhxUk5FTmN6SUJScDZ3ZldIaUJZcWpaQVRiSEpyQm9tRTNDWk9KTnZBPT0=
|
||||
Connector_AiPerplexity_API_SECRET = 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
|
||||
Connector_AiTavily_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk=
|
||||
Connector_AiPrivateLlm_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGSjZ1NWh0aWc1R3Z4MHNaeS1HamtUbndhcUZFZDlqUDhjSmg5eHFfdlVkU0RsVkJ2UVRaMWs3aWhraG5jSlc0YkxNWHVmR2JoSW5ENFFCdkJBM0VienlKSnhzNnBKbTJOUTFKczRfWlQ3bWpmUkRTT1I1OGNUSTlQdExacGRpeXg=
|
||||
Connector_AiMistral_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGZTNtZ1E4TWIxSEU1OUlreUpxZkJIR0Vxcm9xRHRUbnBxbTQ1cXlkbnltWkJVdTdMYWZ4c3Fsam42TERWUTVhNzZFMU9xVjdyRGFCYml6bmZsZFd2YmJzemlrSWN6Q3o3X0NXX2xXNUQteTNONHdKYzJ5YVpLLWdhU2JhSTJQZnI=
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ APP_FRONTEND_URL = https://nyla.poweron-center.net
|
|||
# AI configuration
|
||||
Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9
|
||||
Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09
|
||||
Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQm82Mzk2Q1FGRkJEUkI4LXlQbHYzT2RkdVJEcmM4WGdZTWpJTEhoeUF1NW5LUVpJdDBYN3k1WFN4a2FQSWJSQmd0U0xJbzZDTmFFN05FcXl0Z3V1OEpsZjYydV94TXVjVjVXRTRYSWdLMkd5XzZIbFV6emRCZHpuOUpQeThadE5xcDNDVGV1RHJrUEN0c1BBYXctZFNWcFRuVXhRPT0=
|
||||
Connector_AiPerplexity_API_SECRET = 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
|
||||
Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg=
|
||||
Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
|
||||
Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA=
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ IMPORTANT: Model Registration Requirements
|
|||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Dict, Any, Optional
|
||||
from modules.datamodels.datamodelAi import AiModel
|
||||
from typing import List, Dict, Any, Optional, AsyncGenerator, Union
|
||||
from modules.datamodels.datamodelAi import AiModel, AiModelCall, AiModelResponse
|
||||
|
||||
|
||||
class BaseConnectorAi(ABC):
|
||||
|
|
@ -102,3 +102,24 @@ class BaseConnectorAi(ABC):
|
|||
"""Get only available models."""
|
||||
models = self.getCachedModels()
|
||||
return [model for model in models if model.isAvailable]
|
||||
|
||||
async def callAiBasicStream(self, modelCall: AiModelCall) -> AsyncGenerator[Union[str, AiModelResponse], None]:
|
||||
"""Stream AI response. Yields str deltas during generation, then final AiModelResponse.
|
||||
|
||||
Default implementation: falls back to non-streaming callAiBasic.
|
||||
Override in connectors that support streaming.
|
||||
"""
|
||||
response = await self.callAiBasic(modelCall)
|
||||
if response.content:
|
||||
yield response.content
|
||||
yield response
|
||||
|
||||
async def callEmbedding(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||
"""Generate embeddings for input texts. Override in connectors that support embeddings.
|
||||
|
||||
Reads texts from modelCall.embeddingInput.
|
||||
Returns AiModelResponse with metadata["embeddings"] containing the vectors.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
f"{self.__class__.__name__} does not support embeddings"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
import json
|
||||
import logging
|
||||
import httpx
|
||||
import os
|
||||
from typing import Dict, Any, List
|
||||
from typing import Dict, Any, List, AsyncGenerator, Union
|
||||
from fastapi import HTTPException
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from .aicoreBase import BaseConnectorAi
|
||||
|
|
@ -61,13 +62,15 @@ class AiAnthropic(BaseConnectorAi):
|
|||
speedRating=6, # Slower due to high-quality processing
|
||||
qualityRating=10, # Best quality available
|
||||
functionCall=self.callAiBasic,
|
||||
functionCallStream=self.callAiBasicStream,
|
||||
priority=PriorityEnum.QUALITY,
|
||||
processingMode=ProcessingModeEnum.DETAILED,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.PLAN, 9),
|
||||
(OperationTypeEnum.DATA_ANALYSE, 9),
|
||||
(OperationTypeEnum.DATA_GENERATE, 9),
|
||||
(OperationTypeEnum.DATA_EXTRACT, 8)
|
||||
(OperationTypeEnum.DATA_EXTRACT, 8),
|
||||
(OperationTypeEnum.AGENT, 9),
|
||||
),
|
||||
version="claude-sonnet-4-5-20250929",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.003 + (bytesReceived / 4 / 1000) * 0.015
|
||||
|
|
@ -85,13 +88,15 @@ class AiAnthropic(BaseConnectorAi):
|
|||
speedRating=9, # Very fast, lightweight model
|
||||
qualityRating=8, # Good quality, cost-efficient
|
||||
functionCall=self.callAiBasic,
|
||||
functionCallStream=self.callAiBasicStream,
|
||||
priority=PriorityEnum.SPEED,
|
||||
processingMode=ProcessingModeEnum.BASIC,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.PLAN, 8),
|
||||
(OperationTypeEnum.DATA_ANALYSE, 8),
|
||||
(OperationTypeEnum.DATA_GENERATE, 8),
|
||||
(OperationTypeEnum.DATA_EXTRACT, 7)
|
||||
(OperationTypeEnum.DATA_EXTRACT, 7),
|
||||
(OperationTypeEnum.AGENT, 7),
|
||||
),
|
||||
version="claude-haiku-4-5-20251001",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.001 + (bytesReceived / 4 / 1000) * 0.005
|
||||
|
|
@ -109,13 +114,15 @@ class AiAnthropic(BaseConnectorAi):
|
|||
speedRating=5, # Moderate latency, most capable
|
||||
qualityRating=10, # Top-tier intelligence
|
||||
functionCall=self.callAiBasic,
|
||||
functionCallStream=self.callAiBasicStream,
|
||||
priority=PriorityEnum.QUALITY,
|
||||
processingMode=ProcessingModeEnum.DETAILED,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.PLAN, 10),
|
||||
(OperationTypeEnum.DATA_ANALYSE, 10),
|
||||
(OperationTypeEnum.DATA_ANALYSE, 8),
|
||||
(OperationTypeEnum.DATA_GENERATE, 10),
|
||||
(OperationTypeEnum.DATA_EXTRACT, 9)
|
||||
(OperationTypeEnum.DATA_EXTRACT, 9),
|
||||
(OperationTypeEnum.AGENT, 10),
|
||||
),
|
||||
version="claude-opus-4-6",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.025
|
||||
|
|
@ -158,8 +165,6 @@ class AiAnthropic(BaseConnectorAi):
|
|||
HTTPException: For errors in API communication
|
||||
"""
|
||||
try:
|
||||
# Extract parameters from modelCall
|
||||
messages = modelCall.messages
|
||||
model = modelCall.model
|
||||
options = modelCall.options
|
||||
temperature = getattr(options, "temperature", None)
|
||||
|
|
@ -167,44 +172,8 @@ class AiAnthropic(BaseConnectorAi):
|
|||
temperature = model.temperature
|
||||
maxTokens = model.maxTokens
|
||||
|
||||
# Transform OpenAI-style messages to Anthropic format:
|
||||
# - Move any 'system' role content to top-level 'system'
|
||||
# - Keep only 'user'/'assistant' messages in the list
|
||||
system_contents: List[str] = []
|
||||
converted_messages: List[Dict[str, Any]] = []
|
||||
for m in messages:
|
||||
role = m.get("role")
|
||||
content = m.get("content", "")
|
||||
if role == "system":
|
||||
# Collect system content; Anthropic expects top-level 'system'
|
||||
if isinstance(content, list):
|
||||
# Join text parts if provided as blocks
|
||||
joined = "\n\n".join(
|
||||
[
|
||||
(part.get("text") if isinstance(part, dict) else str(part))
|
||||
for part in content
|
||||
]
|
||||
)
|
||||
system_contents.append(joined)
|
||||
else:
|
||||
system_contents.append(str(content))
|
||||
continue
|
||||
# For Anthropic, content can be a string; pass through strings, collapse blocks
|
||||
if isinstance(content, list):
|
||||
# Collapse to text if blocks are provided
|
||||
collapsed = "\n\n".join(
|
||||
[
|
||||
(part.get("text") if isinstance(part, dict) else str(part))
|
||||
for part in content
|
||||
]
|
||||
)
|
||||
converted_messages.append({"role": role, "content": collapsed})
|
||||
else:
|
||||
converted_messages.append({"role": role, "content": content})
|
||||
converted_messages, system_prompt = _convertMessagesForAnthropic(modelCall.messages)
|
||||
|
||||
system_prompt = "\n\n".join([s for s in system_contents if s]) if system_contents else None
|
||||
|
||||
# Create Anthropic API payload
|
||||
payload: Dict[str, Any] = {
|
||||
"model": model.name,
|
||||
"messages": converted_messages,
|
||||
|
|
@ -218,6 +187,13 @@ class AiAnthropic(BaseConnectorAi):
|
|||
if system_prompt:
|
||||
payload["system"] = system_prompt
|
||||
|
||||
if modelCall.tools:
|
||||
payload["tools"] = _convertToolsToAnthropicFormat(modelCall.tools)
|
||||
if modelCall.toolChoice:
|
||||
payload["tool_choice"] = modelCall.toolChoice
|
||||
else:
|
||||
payload["tool_choice"] = {"type": "auto"}
|
||||
|
||||
response = await self.httpClient.post(
|
||||
model.apiUrl,
|
||||
json=payload
|
||||
|
|
@ -244,29 +220,39 @@ class AiAnthropic(BaseConnectorAi):
|
|||
# Parse response
|
||||
anthropicResponse = response.json()
|
||||
|
||||
# Extract content from response
|
||||
# Extract content and tool_use blocks from response
|
||||
content = ""
|
||||
toolCalls = []
|
||||
if "content" in anthropicResponse:
|
||||
if isinstance(anthropicResponse["content"], list):
|
||||
# Content is a list of parts (in newer API versions)
|
||||
for part in anthropicResponse["content"]:
|
||||
if part.get("type") == "text":
|
||||
content += part.get("text", "")
|
||||
elif part.get("type") == "tool_use":
|
||||
toolCalls.append({
|
||||
"id": part.get("id", ""),
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": part.get("name", ""),
|
||||
"arguments": json.dumps(part.get("input", {})) if isinstance(part.get("input"), dict) else str(part.get("input", "{}"))
|
||||
}
|
||||
})
|
||||
else:
|
||||
# Direct content as string (in older API versions)
|
||||
content = anthropicResponse["content"]
|
||||
|
||||
# Debug logging for empty responses
|
||||
if not content or content.strip() == "":
|
||||
if not content and not toolCalls:
|
||||
logger.warning(f"Anthropic API returned empty content. Full response: {anthropicResponse}")
|
||||
content = "[Anthropic API returned empty response]"
|
||||
|
||||
# Return standardized response
|
||||
metadata = {"response_id": anthropicResponse.get("id", "")}
|
||||
if toolCalls:
|
||||
metadata["toolCalls"] = toolCalls
|
||||
|
||||
return AiModelResponse(
|
||||
content=content,
|
||||
success=True,
|
||||
modelId=model.name,
|
||||
metadata={"response_id": anthropicResponse.get("id", "")}
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -279,6 +265,101 @@ class AiAnthropic(BaseConnectorAi):
|
|||
logger.error(error_detail, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=error_detail)
|
||||
|
||||
async def callAiBasicStream(self, modelCall: AiModelCall) -> AsyncGenerator[Union[str, AiModelResponse], None]:
|
||||
"""Stream Anthropic response. Yields str deltas, then final AiModelResponse."""
|
||||
try:
|
||||
model = modelCall.model
|
||||
options = modelCall.options
|
||||
temperature = getattr(options, "temperature", None)
|
||||
if temperature is None:
|
||||
temperature = model.temperature
|
||||
|
||||
converted, system_prompt = _convertMessagesForAnthropic(modelCall.messages)
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": model.name,
|
||||
"messages": converted,
|
||||
"temperature": temperature,
|
||||
"max_tokens": model.maxTokens,
|
||||
"stream": True,
|
||||
}
|
||||
if system_prompt:
|
||||
payload["system"] = system_prompt
|
||||
if modelCall.tools:
|
||||
payload["tools"] = _convertToolsToAnthropicFormat(modelCall.tools)
|
||||
payload["tool_choice"] = modelCall.toolChoice or {"type": "auto"}
|
||||
|
||||
fullContent = ""
|
||||
toolUseBlocks: Dict[int, Dict[str, Any]] = {}
|
||||
currentToolIdx = -1
|
||||
|
||||
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
|
||||
if response.status_code != 200:
|
||||
body = await response.aread()
|
||||
raise HTTPException(status_code=500, detail=f"Anthropic stream error: {response.status_code} - {body.decode()}")
|
||||
|
||||
async for line in response.aiter_lines():
|
||||
if not line.startswith("data: "):
|
||||
continue
|
||||
try:
|
||||
event = json.loads(line[6:])
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
eventType = event.get("type", "")
|
||||
|
||||
if eventType == "content_block_start":
|
||||
block = event.get("content_block", {})
|
||||
idx = event.get("index", 0)
|
||||
if block.get("type") == "tool_use":
|
||||
currentToolIdx = idx
|
||||
toolUseBlocks[idx] = {
|
||||
"id": block.get("id", ""),
|
||||
"name": block.get("name", ""),
|
||||
"arguments": "",
|
||||
}
|
||||
|
||||
elif eventType == "content_block_delta":
|
||||
delta = event.get("delta", {})
|
||||
if delta.get("type") == "text_delta":
|
||||
text = delta.get("text", "")
|
||||
fullContent += text
|
||||
yield text
|
||||
elif delta.get("type") == "input_json_delta":
|
||||
idx = event.get("index", currentToolIdx)
|
||||
if idx in toolUseBlocks:
|
||||
toolUseBlocks[idx]["arguments"] += delta.get("partial_json", "")
|
||||
|
||||
elif eventType == "message_stop":
|
||||
break
|
||||
|
||||
metadata: Dict[str, Any] = {}
|
||||
if toolUseBlocks:
|
||||
metadata["toolCalls"] = [
|
||||
{
|
||||
"id": tb["id"],
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tb["name"],
|
||||
"arguments": tb["arguments"],
|
||||
},
|
||||
}
|
||||
for tb in toolUseBlocks.values()
|
||||
]
|
||||
|
||||
yield AiModelResponse(
|
||||
content=fullContent,
|
||||
success=True,
|
||||
modelId=model.name,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error streaming Anthropic API: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Error streaming Anthropic API: {e}")
|
||||
|
||||
async def callAiImage(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||
"""
|
||||
Analyzes an image using Anthropic's vision capabilities using standardized pattern.
|
||||
|
|
@ -331,6 +412,20 @@ class AiAnthropic(BaseConnectorAi):
|
|||
mimeType = parts[0].replace("data:", "")
|
||||
base64Data = parts[1]
|
||||
|
||||
import base64 as _b64
|
||||
try:
|
||||
rawHead = _b64.b64decode(base64Data[:32])
|
||||
if rawHead[:3] == b"\xff\xd8\xff":
|
||||
mimeType = "image/jpeg"
|
||||
elif rawHead[:8] == b"\x89PNG\r\n\x1a\n":
|
||||
mimeType = "image/png"
|
||||
elif rawHead[:4] == b"GIF8":
|
||||
mimeType = "image/gif"
|
||||
elif rawHead[:4] == b"RIFF" and rawHead[8:12] == b"WEBP":
|
||||
mimeType = "image/webp"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Convert to Anthropic's vision format
|
||||
anthropicMessages = [{
|
||||
"role": "user",
|
||||
|
|
@ -425,3 +520,100 @@ class AiAnthropic(BaseConnectorAi):
|
|||
success=False,
|
||||
error=f"Error during image analysis: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
def _convertMessagesForAnthropic(messages: List[Dict[str, Any]]):
|
||||
"""Convert OpenAI-style messages to Anthropic format. Returns (messages, system_prompt)."""
|
||||
system_contents: List[str] = []
|
||||
converted_messages: List[Dict[str, Any]] = []
|
||||
pendingToolResults: List[Dict[str, Any]] = []
|
||||
|
||||
def _flush():
|
||||
if not pendingToolResults:
|
||||
return
|
||||
converted_messages.append({"role": "user", "content": list(pendingToolResults)})
|
||||
pendingToolResults.clear()
|
||||
|
||||
def _collapse(content):
|
||||
if isinstance(content, list):
|
||||
return "\n\n".join(
|
||||
(part.get("text") if isinstance(part, dict) else str(part))
|
||||
for part in content
|
||||
)
|
||||
return str(content) if content else ""
|
||||
|
||||
for m in messages:
|
||||
role = m.get("role")
|
||||
content = m.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
system_contents.append(_collapse(content))
|
||||
continue
|
||||
if role == "tool":
|
||||
pendingToolResults.append({
|
||||
"type": "tool_result",
|
||||
"tool_use_id": m.get("tool_call_id", ""),
|
||||
"content": str(content) if content else "",
|
||||
})
|
||||
continue
|
||||
|
||||
_flush()
|
||||
|
||||
if role == "assistant" and m.get("tool_calls"):
|
||||
contentBlocks = []
|
||||
textPart = _collapse(content)
|
||||
if textPart:
|
||||
contentBlocks.append({"type": "text", "text": textPart})
|
||||
for tc in m["tool_calls"]:
|
||||
fn = tc.get("function", {})
|
||||
inputData = fn.get("arguments", "{}")
|
||||
if isinstance(inputData, str):
|
||||
try:
|
||||
inputData = json.loads(inputData)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
inputData = {}
|
||||
contentBlocks.append({
|
||||
"type": "tool_use",
|
||||
"id": tc.get("id", ""),
|
||||
"name": fn.get("name", ""),
|
||||
"input": inputData,
|
||||
})
|
||||
converted_messages.append({"role": "assistant", "content": contentBlocks})
|
||||
continue
|
||||
|
||||
converted_messages.append({"role": role, "content": _collapse(content)})
|
||||
|
||||
_flush()
|
||||
|
||||
merged: List[Dict[str, Any]] = []
|
||||
for msg in converted_messages:
|
||||
if merged and merged[-1]["role"] == msg["role"]:
|
||||
prev = merged[-1]
|
||||
pc, nc = prev["content"], msg["content"]
|
||||
if isinstance(pc, str) and isinstance(nc, str):
|
||||
prev["content"] = pc + "\n\n" + nc
|
||||
elif isinstance(pc, list) and isinstance(nc, list):
|
||||
prev["content"] = pc + nc
|
||||
elif isinstance(pc, str) and isinstance(nc, list):
|
||||
prev["content"] = [{"type": "text", "text": pc}] + nc
|
||||
elif isinstance(pc, list) and isinstance(nc, str):
|
||||
prev["content"] = pc + [{"type": "text", "text": nc}]
|
||||
else:
|
||||
merged.append(msg)
|
||||
|
||||
system_prompt = "\n\n".join([s for s in system_contents if s]) if system_contents else None
|
||||
return merged, system_prompt
|
||||
|
||||
|
||||
def _convertToolsToAnthropicFormat(openaiTools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Convert OpenAI-style tool definitions to Anthropic format."""
|
||||
anthropicTools = []
|
||||
for tool in openaiTools:
|
||||
if tool.get("type") == "function":
|
||||
fn = tool["function"]
|
||||
anthropicTools.append({
|
||||
"name": fn["name"],
|
||||
"description": fn.get("description", ""),
|
||||
"input_schema": fn.get("parameters", {"type": "object", "properties": {}})
|
||||
})
|
||||
return anthropicTools
|
||||
|
|
@ -1,8 +1,9 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
import logging
|
||||
import json as _json
|
||||
import httpx
|
||||
from typing import List
|
||||
from typing import List, Dict, Any, AsyncGenerator, Union
|
||||
from fastapi import HTTPException
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from .aicoreBase import BaseConnectorAi
|
||||
|
|
@ -66,13 +67,15 @@ class AiMistral(BaseConnectorAi):
|
|||
speedRating=8, # Good speed for complex tasks
|
||||
qualityRating=9, # High quality
|
||||
functionCall=self.callAiBasic,
|
||||
functionCallStream=self.callAiBasicStream,
|
||||
priority=PriorityEnum.BALANCED,
|
||||
processingMode=ProcessingModeEnum.ADVANCED,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.PLAN, 9),
|
||||
(OperationTypeEnum.DATA_ANALYSE, 9),
|
||||
(OperationTypeEnum.DATA_GENERATE, 9),
|
||||
(OperationTypeEnum.DATA_EXTRACT, 8)
|
||||
(OperationTypeEnum.DATA_EXTRACT, 8),
|
||||
(OperationTypeEnum.AGENT, 8),
|
||||
),
|
||||
version="mistral-large-latest",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0005 + (bytesReceived / 4 / 1000) * 0.0015
|
||||
|
|
@ -90,17 +93,40 @@ class AiMistral(BaseConnectorAi):
|
|||
speedRating=9, # Very fast, lightweight model
|
||||
qualityRating=7, # Good quality, cost-efficient
|
||||
functionCall=self.callAiBasic,
|
||||
functionCallStream=self.callAiBasicStream,
|
||||
priority=PriorityEnum.SPEED,
|
||||
processingMode=ProcessingModeEnum.BASIC,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.PLAN, 7),
|
||||
(OperationTypeEnum.DATA_ANALYSE, 7),
|
||||
(OperationTypeEnum.DATA_GENERATE, 8),
|
||||
(OperationTypeEnum.DATA_EXTRACT, 7)
|
||||
(OperationTypeEnum.DATA_EXTRACT, 7),
|
||||
(OperationTypeEnum.AGENT, 6),
|
||||
),
|
||||
version="mistral-small-latest",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00006 + (bytesReceived / 4 / 1000) * 0.00018
|
||||
),
|
||||
AiModel(
|
||||
name="mistral-embed",
|
||||
displayName="Mistral Embed",
|
||||
connectorType="mistral",
|
||||
apiUrl="https://api.mistral.ai/v1/embeddings",
|
||||
temperature=0.0,
|
||||
maxTokens=0,
|
||||
contextLength=8192,
|
||||
costPer1kTokensInput=0.0001, # $0.10/M tokens
|
||||
costPer1kTokensOutput=0.0,
|
||||
speedRating=10,
|
||||
qualityRating=7,
|
||||
functionCall=self.callEmbedding,
|
||||
priority=PriorityEnum.COST,
|
||||
processingMode=ProcessingModeEnum.BASIC,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.EMBEDDING, 8)
|
||||
),
|
||||
version="mistral-embed",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0001
|
||||
),
|
||||
AiModel(
|
||||
name="mistral-large-latest",
|
||||
displayName="Mistral Large 3 Vision",
|
||||
|
|
@ -216,6 +242,104 @@ class AiMistral(BaseConnectorAi):
|
|||
logger.error(f"Error calling Mistral API: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error calling Mistral API: {str(e)}")
|
||||
|
||||
async def callAiBasicStream(self, modelCall: AiModelCall) -> AsyncGenerator[Union[str, AiModelResponse], None]:
|
||||
"""Stream Mistral response. Yields str deltas, then final AiModelResponse."""
|
||||
try:
|
||||
model = modelCall.model
|
||||
options = modelCall.options
|
||||
temperature = getattr(options, "temperature", None)
|
||||
if temperature is None:
|
||||
temperature = model.temperature
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": model.name,
|
||||
"messages": modelCall.messages,
|
||||
"temperature": temperature,
|
||||
"max_tokens": model.maxTokens,
|
||||
"stream": True,
|
||||
}
|
||||
|
||||
fullContent = ""
|
||||
|
||||
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
|
||||
if response.status_code != 200:
|
||||
body = await response.aread()
|
||||
raise HTTPException(status_code=500, detail=f"Mistral stream error: {response.status_code} - {body.decode()}")
|
||||
|
||||
async for line in response.aiter_lines():
|
||||
if not line.startswith("data: "):
|
||||
continue
|
||||
data = line[6:]
|
||||
if data.strip() == "[DONE]":
|
||||
break
|
||||
try:
|
||||
chunk = _json.loads(data)
|
||||
except _json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
||||
if "content" in delta and delta["content"]:
|
||||
fullContent += delta["content"]
|
||||
yield delta["content"]
|
||||
|
||||
yield AiModelResponse(
|
||||
content=fullContent,
|
||||
success=True,
|
||||
modelId=model.name,
|
||||
metadata={},
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error streaming Mistral API: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Error streaming Mistral API: {e}")
|
||||
|
||||
async def callEmbedding(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||
"""Generate embeddings via the Mistral Embeddings API.
|
||||
|
||||
Reads texts from modelCall.embeddingInput.
|
||||
Returns vectors in metadata["embeddings"].
|
||||
"""
|
||||
try:
|
||||
model = modelCall.model
|
||||
texts = modelCall.embeddingInput or []
|
||||
if not texts:
|
||||
return AiModelResponse(
|
||||
content="", success=False, error="No embeddingInput provided"
|
||||
)
|
||||
|
||||
payload = {"model": model.name, "input": texts}
|
||||
response = await self.httpClient.post(model.apiUrl, json=payload)
|
||||
|
||||
if response.status_code != 200:
|
||||
errorMessage = f"Mistral Embedding API error: {response.status_code} - {response.text}"
|
||||
logger.error(errorMessage)
|
||||
if response.status_code == 429:
|
||||
raise RateLimitExceededException(f"Rate limit exceeded for {model.name}")
|
||||
raise HTTPException(status_code=500, detail=errorMessage)
|
||||
|
||||
responseJson = response.json()
|
||||
embeddings = [item["embedding"] for item in responseJson["data"]]
|
||||
usage = responseJson.get("usage", {})
|
||||
|
||||
return AiModelResponse(
|
||||
content="",
|
||||
success=True,
|
||||
modelId=model.name,
|
||||
tokensUsed={
|
||||
"input": usage.get("prompt_tokens", 0),
|
||||
"output": 0,
|
||||
"total": usage.get("total_tokens", 0),
|
||||
},
|
||||
metadata={"embeddings": embeddings},
|
||||
)
|
||||
except RateLimitExceededException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error calling Mistral Embedding API: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error calling Mistral Embedding API: {str(e)}")
|
||||
|
||||
async def callAiImage(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||
"""
|
||||
Analyzes an image with the Mistral Vision API using standardized pattern.
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
import logging
|
||||
import json as _json
|
||||
import httpx
|
||||
from typing import List
|
||||
from typing import List, Dict, Any, AsyncGenerator, Union
|
||||
from fastapi import HTTPException
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from .aicoreBase import BaseConnectorAi
|
||||
|
|
@ -67,13 +68,15 @@ class AiOpenai(BaseConnectorAi):
|
|||
speedRating=8, # Good speed for complex tasks
|
||||
qualityRating=10, # High quality
|
||||
functionCall=self.callAiBasic,
|
||||
functionCallStream=self.callAiBasicStream,
|
||||
priority=PriorityEnum.BALANCED,
|
||||
processingMode=ProcessingModeEnum.ADVANCED,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.PLAN, 9),
|
||||
(OperationTypeEnum.DATA_ANALYSE, 10),
|
||||
(OperationTypeEnum.DATA_GENERATE, 10),
|
||||
(OperationTypeEnum.DATA_EXTRACT, 7)
|
||||
(OperationTypeEnum.DATA_EXTRACT, 7),
|
||||
(OperationTypeEnum.AGENT, 9),
|
||||
),
|
||||
version="gpt-4o",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0025 + (bytesReceived / 4 / 1000) * 0.01
|
||||
|
|
@ -92,13 +95,15 @@ class AiOpenai(BaseConnectorAi):
|
|||
speedRating=9, # Very fast
|
||||
qualityRating=8, # Good quality, replaces gpt-3.5-turbo
|
||||
functionCall=self.callAiBasic,
|
||||
functionCallStream=self.callAiBasicStream,
|
||||
priority=PriorityEnum.SPEED,
|
||||
processingMode=ProcessingModeEnum.BASIC,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.PLAN, 8),
|
||||
(OperationTypeEnum.DATA_ANALYSE, 8),
|
||||
(OperationTypeEnum.DATA_GENERATE, 9),
|
||||
(OperationTypeEnum.DATA_EXTRACT, 7)
|
||||
(OperationTypeEnum.DATA_EXTRACT, 7),
|
||||
(OperationTypeEnum.AGENT, 8),
|
||||
),
|
||||
version="gpt-4o-mini",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00015 + (bytesReceived / 4 / 1000) * 0.0006
|
||||
|
|
@ -125,6 +130,48 @@ class AiOpenai(BaseConnectorAi):
|
|||
version="gpt-4o",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0025 + (bytesReceived / 4 / 1000) * 0.01
|
||||
),
|
||||
AiModel(
|
||||
name="text-embedding-3-small",
|
||||
displayName="OpenAI Embedding Small",
|
||||
connectorType="openai",
|
||||
apiUrl="https://api.openai.com/v1/embeddings",
|
||||
temperature=0.0,
|
||||
maxTokens=0,
|
||||
contextLength=8191,
|
||||
costPer1kTokensInput=0.00002, # $0.02/M tokens
|
||||
costPer1kTokensOutput=0.0,
|
||||
speedRating=10,
|
||||
qualityRating=8,
|
||||
functionCall=self.callEmbedding,
|
||||
priority=PriorityEnum.COST,
|
||||
processingMode=ProcessingModeEnum.BASIC,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.EMBEDDING, 10)
|
||||
),
|
||||
version="text-embedding-3-small",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00002
|
||||
),
|
||||
AiModel(
|
||||
name="text-embedding-3-large",
|
||||
displayName="OpenAI Embedding Large",
|
||||
connectorType="openai",
|
||||
apiUrl="https://api.openai.com/v1/embeddings",
|
||||
temperature=0.0,
|
||||
maxTokens=0,
|
||||
contextLength=8191,
|
||||
costPer1kTokensInput=0.00013, # $0.13/M tokens
|
||||
costPer1kTokensOutput=0.0,
|
||||
speedRating=9,
|
||||
qualityRating=10,
|
||||
functionCall=self.callEmbedding,
|
||||
priority=PriorityEnum.QUALITY,
|
||||
processingMode=ProcessingModeEnum.ADVANCED,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.EMBEDDING, 10)
|
||||
),
|
||||
version="text-embedding-3-large",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00013
|
||||
),
|
||||
AiModel(
|
||||
name="dall-e-3",
|
||||
displayName="OpenAI DALL-E 3",
|
||||
|
|
@ -179,6 +226,10 @@ class AiOpenai(BaseConnectorAi):
|
|||
"max_tokens": maxTokens
|
||||
}
|
||||
|
||||
if modelCall.tools:
|
||||
payload["tools"] = modelCall.tools
|
||||
payload["tool_choice"] = modelCall.toolChoice or "auto"
|
||||
|
||||
response = await self.httpClient.post(
|
||||
model.apiUrl,
|
||||
json=payload
|
||||
|
|
@ -218,22 +269,150 @@ class AiOpenai(BaseConnectorAi):
|
|||
raise HTTPException(status_code=500, detail=error_message)
|
||||
|
||||
responseJson = response.json()
|
||||
content = responseJson["choices"][0]["message"]["content"]
|
||||
choiceMessage = responseJson["choices"][0]["message"]
|
||||
content = choiceMessage.get("content") or ""
|
||||
|
||||
metadata = {"response_id": responseJson.get("id", "")}
|
||||
if choiceMessage.get("tool_calls"):
|
||||
metadata["toolCalls"] = choiceMessage["tool_calls"]
|
||||
|
||||
return AiModelResponse(
|
||||
content=content,
|
||||
success=True,
|
||||
modelId=model.name,
|
||||
metadata={"response_id": responseJson.get("id", "")}
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
except ContextLengthExceededException:
|
||||
# Re-raise context length exceptions without wrapping
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error calling OpenAI API: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error calling OpenAI API: {str(e)}")
|
||||
|
||||
async def callAiBasicStream(self, modelCall: AiModelCall) -> AsyncGenerator[Union[str, AiModelResponse], None]:
|
||||
"""Stream OpenAI response. Yields str deltas, then final AiModelResponse."""
|
||||
try:
|
||||
messages = modelCall.messages
|
||||
model = modelCall.model
|
||||
options = modelCall.options
|
||||
temperature = getattr(options, "temperature", None)
|
||||
if temperature is None:
|
||||
temperature = model.temperature
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": model.name,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"max_tokens": model.maxTokens,
|
||||
"stream": True,
|
||||
}
|
||||
if modelCall.tools:
|
||||
payload["tools"] = modelCall.tools
|
||||
payload["tool_choice"] = modelCall.toolChoice or "auto"
|
||||
|
||||
fullContent = ""
|
||||
toolCallsAccum: Dict[int, Dict[str, Any]] = {}
|
||||
|
||||
async with self.httpClient.stream("POST", model.apiUrl, json=payload) as response:
|
||||
if response.status_code != 200:
|
||||
body = await response.aread()
|
||||
raise HTTPException(status_code=500, detail=f"OpenAI stream error: {response.status_code} - {body.decode()}")
|
||||
|
||||
async for line in response.aiter_lines():
|
||||
if not line.startswith("data: "):
|
||||
continue
|
||||
data = line[6:]
|
||||
if data.strip() == "[DONE]":
|
||||
break
|
||||
try:
|
||||
chunk = _json.loads(data)
|
||||
except _json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
||||
|
||||
if "content" in delta and delta["content"]:
|
||||
fullContent += delta["content"]
|
||||
yield delta["content"]
|
||||
|
||||
for tcDelta in delta.get("tool_calls", []):
|
||||
idx = tcDelta.get("index", 0)
|
||||
if idx not in toolCallsAccum:
|
||||
toolCallsAccum[idx] = {
|
||||
"id": tcDelta.get("id", ""),
|
||||
"type": "function",
|
||||
"function": {"name": "", "arguments": ""},
|
||||
}
|
||||
if tcDelta.get("id"):
|
||||
toolCallsAccum[idx]["id"] = tcDelta["id"]
|
||||
fn = tcDelta.get("function", {})
|
||||
if fn.get("name"):
|
||||
toolCallsAccum[idx]["function"]["name"] = fn["name"]
|
||||
if fn.get("arguments"):
|
||||
toolCallsAccum[idx]["function"]["arguments"] += fn["arguments"]
|
||||
|
||||
metadata: Dict[str, Any] = {}
|
||||
if toolCallsAccum:
|
||||
metadata["toolCalls"] = [toolCallsAccum[i] for i in sorted(toolCallsAccum)]
|
||||
|
||||
yield AiModelResponse(
|
||||
content=fullContent,
|
||||
success=True,
|
||||
modelId=model.name,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error streaming OpenAI API: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Error streaming OpenAI API: {e}")
|
||||
|
||||
async def callEmbedding(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||
"""Generate embeddings via the OpenAI Embeddings API.
|
||||
|
||||
Reads texts from modelCall.embeddingInput.
|
||||
Returns vectors in metadata["embeddings"].
|
||||
"""
|
||||
try:
|
||||
model = modelCall.model
|
||||
texts = modelCall.embeddingInput or []
|
||||
if not texts:
|
||||
return AiModelResponse(
|
||||
content="", success=False, error="No embeddingInput provided"
|
||||
)
|
||||
|
||||
payload = {"model": model.name, "input": texts}
|
||||
response = await self.httpClient.post(model.apiUrl, json=payload)
|
||||
|
||||
if response.status_code != 200:
|
||||
errorMessage = f"OpenAI Embedding API error: {response.status_code} - {response.text}"
|
||||
logger.error(errorMessage)
|
||||
if response.status_code == 429:
|
||||
raise RateLimitExceededException(f"Rate limit exceeded for {model.name}")
|
||||
raise HTTPException(status_code=500, detail=errorMessage)
|
||||
|
||||
responseJson = response.json()
|
||||
embeddings = [item["embedding"] for item in responseJson["data"]]
|
||||
usage = responseJson.get("usage", {})
|
||||
|
||||
return AiModelResponse(
|
||||
content="",
|
||||
success=True,
|
||||
modelId=model.name,
|
||||
tokensUsed={
|
||||
"input": usage.get("prompt_tokens", 0),
|
||||
"output": 0,
|
||||
"total": usage.get("total_tokens", 0),
|
||||
},
|
||||
metadata={"embeddings": embeddings},
|
||||
)
|
||||
except (RateLimitExceededException, ContextLengthExceededException):
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error calling OpenAI Embedding API: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error calling OpenAI Embedding API: {str(e)}")
|
||||
|
||||
async def callAiImage(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||
"""
|
||||
Analyzes an image with the OpenAI Vision API using standardized pattern.
|
||||
|
|
|
|||
|
|
@ -288,7 +288,16 @@ class AiTavily(BaseConnectorAi):
|
|||
if maxResults < minResults or maxResults > maxAllowedResults:
|
||||
raise ValueError(f"maxResults must be between {minResults} and {maxAllowedResults}")
|
||||
|
||||
# Perform actual API call
|
||||
# Tavily enforces a 400-character query limit
|
||||
TAVILY_MAX_QUERY_LENGTH = 400
|
||||
if len(query) > TAVILY_MAX_QUERY_LENGTH:
|
||||
truncated = query[:TAVILY_MAX_QUERY_LENGTH]
|
||||
lastSpace = truncated.rfind(' ')
|
||||
if lastSpace > TAVILY_MAX_QUERY_LENGTH // 2:
|
||||
truncated = truncated[:lastSpace]
|
||||
logger.warning(f"Tavily query truncated from {len(query)} to {len(truncated)} chars")
|
||||
query = truncated
|
||||
|
||||
# Build kwargs only for provided options to avoid API rejections
|
||||
kwargs: dict = {"query": query, "max_results": maxResults}
|
||||
if searchDepth is not None:
|
||||
|
|
|
|||
|
|
@ -41,6 +41,11 @@ class SystemTable(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
def _isVectorType(sqlType: str) -> bool:
|
||||
"""Check if a SQL type string represents a pgvector column."""
|
||||
return sqlType.upper().startswith("VECTOR")
|
||||
|
||||
|
||||
def _isJsonbType(fieldType) -> bool:
|
||||
"""Check if a type should be stored as JSONB in PostgreSQL."""
|
||||
# Direct dict or list
|
||||
|
|
@ -70,20 +75,26 @@ def _isJsonbType(fieldType) -> bool:
|
|||
|
||||
|
||||
def _get_model_fields(model_class) -> Dict[str, str]:
|
||||
"""Get all fields from Pydantic model and map to SQL types."""
|
||||
# Pydantic v2
|
||||
"""Get all fields from Pydantic model and map to SQL types.
|
||||
|
||||
Supports explicit db_type override via json_schema_extra={"db_type": "vector(1536)"}.
|
||||
This enables pgvector columns without special-casing field names.
|
||||
"""
|
||||
model_fields = model_class.model_fields
|
||||
|
||||
fields = {}
|
||||
for field_name, field_info in model_fields.items():
|
||||
# Pydantic v2
|
||||
field_type = field_info.annotation
|
||||
|
||||
# Explicit db_type override (e.g. vector columns)
|
||||
extra = field_info.json_schema_extra
|
||||
if extra and isinstance(extra, dict) and "db_type" in extra:
|
||||
fields[field_name] = extra["db_type"]
|
||||
continue
|
||||
|
||||
# Check for JSONB fields (Dict, List, or complex types)
|
||||
# Purely type-based detection - no hardcoded field names
|
||||
if _isJsonbType(field_type):
|
||||
fields[field_name] = "JSONB"
|
||||
# Simple type mapping
|
||||
elif field_type in (str, type(None)) or (
|
||||
get_origin(field_type) is Union and type(None) in get_args(field_type)
|
||||
):
|
||||
|
|
@ -95,11 +106,45 @@ def _get_model_fields(model_class) -> Dict[str, str]:
|
|||
elif field_type == bool:
|
||||
fields[field_name] = "BOOLEAN"
|
||||
else:
|
||||
fields[field_name] = "TEXT" # Default to TEXT
|
||||
fields[field_name] = "TEXT"
|
||||
|
||||
return fields
|
||||
|
||||
|
||||
def _parseRecordFields(record: Dict[str, Any], fields: Dict[str, str], context: str = "") -> None:
|
||||
"""Parse record fields in-place: numeric typing, vector parsing, JSONB deserialization."""
|
||||
import json as _json
|
||||
|
||||
for fieldName, fieldType in fields.items():
|
||||
if fieldName not in record:
|
||||
continue
|
||||
value = record[fieldName]
|
||||
|
||||
if fieldType in ("DOUBLE PRECISION", "INTEGER") and value is not None:
|
||||
try:
|
||||
record[fieldName] = float(value) if fieldType == "DOUBLE PRECISION" else int(value)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Could not convert {fieldName} to {fieldType} ({context}): {value}")
|
||||
|
||||
elif _isVectorType(fieldType) and value is not None:
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
record[fieldName] = [float(v) for v in value.strip("[]").split(",")]
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Could not parse vector field {fieldName} ({context})")
|
||||
elif isinstance(value, list):
|
||||
pass # already a list
|
||||
|
||||
elif fieldType == "JSONB" and value is not None:
|
||||
try:
|
||||
if isinstance(value, str):
|
||||
record[fieldName] = _json.loads(value)
|
||||
elif not isinstance(value, (dict, list)):
|
||||
record[fieldName] = _json.loads(str(value))
|
||||
except (_json.JSONDecodeError, TypeError, ValueError):
|
||||
logger.warning(f"Could not parse JSONB field {fieldName}, keeping as string ({context})")
|
||||
|
||||
|
||||
# Cache connectors by (host, database, port) to avoid duplicate inits for same database.
|
||||
# Thread safety: _connector_cache_lock protects cache access. userId is request-scoped via
|
||||
# contextvars to avoid races when concurrent requests share the same connector.
|
||||
|
|
@ -187,6 +232,9 @@ class DatabaseConnector:
|
|||
# Thread safety
|
||||
self._lock = threading.Lock()
|
||||
|
||||
# pgvector extension state
|
||||
self._vectorExtensionEnabled = False
|
||||
|
||||
# Initialize system table
|
||||
self._systemTableName = "_system"
|
||||
self._initializeSystemTable()
|
||||
|
|
@ -500,10 +548,32 @@ class DatabaseConnector:
|
|||
self.connection.rollback()
|
||||
return False
|
||||
|
||||
def _ensureVectorExtension(self) -> bool:
|
||||
"""Enable pgvector extension if not already enabled. Called lazily on first vector table."""
|
||||
if self._vectorExtensionEnabled:
|
||||
return True
|
||||
try:
|
||||
self._ensure_connection()
|
||||
with self.connection.cursor() as cursor:
|
||||
cursor.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
||||
self.connection.commit()
|
||||
self._vectorExtensionEnabled = True
|
||||
logger.info("pgvector extension enabled")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to enable pgvector extension: {e}")
|
||||
if hasattr(self, "connection") and self.connection:
|
||||
self.connection.rollback()
|
||||
return False
|
||||
|
||||
def _create_table_from_model(self, cursor, table: str, model_class: type) -> None:
|
||||
"""Create table with columns matching Pydantic model fields."""
|
||||
fields = _get_model_fields(model_class)
|
||||
|
||||
# Enable pgvector if any field uses vector type
|
||||
if any(_isVectorType(sqlType) for sqlType in fields.values()):
|
||||
self._ensureVectorExtension()
|
||||
|
||||
# Build column definitions with quoted identifiers to preserve exact case
|
||||
columns = ['"id" VARCHAR(255) PRIMARY KEY']
|
||||
for field_name, sql_type in fields.items():
|
||||
|
|
@ -576,28 +646,25 @@ class DatabaseConnector:
|
|||
elif hasattr(value, "value"):
|
||||
value = value.value
|
||||
|
||||
# Handle vector fields (pgvector) - convert List[float] to string
|
||||
elif col in fields and _isVectorType(fields[col]) and value is not None:
|
||||
if isinstance(value, list):
|
||||
value = f"[{','.join(str(v) for v in value)}]"
|
||||
|
||||
# Handle JSONB fields - ensure proper JSON format for PostgreSQL
|
||||
elif col in fields and fields[col] == "JSONB" and value is not None:
|
||||
import json
|
||||
|
||||
if isinstance(value, (dict, list)):
|
||||
# Convert Python objects to JSON string for PostgreSQL JSONB
|
||||
value = json.dumps(value)
|
||||
elif isinstance(value, str):
|
||||
# Validate that it's valid JSON, if not, try to parse and re-serialize
|
||||
try:
|
||||
# Test if it's already valid JSON
|
||||
json.loads(value)
|
||||
# If successful, keep as is
|
||||
pass
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# If not valid JSON, convert to JSON string
|
||||
value = json.dumps(value)
|
||||
elif hasattr(value, 'model_dump'):
|
||||
# Handle Pydantic models
|
||||
value = json.dumps(value.model_dump())
|
||||
else:
|
||||
# Convert other types to JSON
|
||||
value = json.dumps(value)
|
||||
|
||||
values.append(value)
|
||||
|
|
@ -635,46 +702,7 @@ class DatabaseConnector:
|
|||
record = dict(row)
|
||||
fields = _get_model_fields(model_class)
|
||||
|
||||
# Ensure numeric fields are properly typed and parse JSONB fields
|
||||
for field_name, field_type in fields.items():
|
||||
# Ensure numeric fields (float/int) are properly typed
|
||||
# psycopg2 may return them as strings in some environments (e.g., Azure PostgreSQL)
|
||||
if field_type in ("DOUBLE PRECISION", "INTEGER") and field_name in record:
|
||||
value = record[field_name]
|
||||
if value is not None:
|
||||
try:
|
||||
if field_type == "DOUBLE PRECISION":
|
||||
record[field_name] = float(value)
|
||||
elif field_type == "INTEGER":
|
||||
record[field_name] = int(value)
|
||||
except (ValueError, TypeError):
|
||||
# If conversion fails, log warning but keep original value
|
||||
logger.warning(
|
||||
f"Could not convert {field_name} to {field_type} for record {recordId}: {value}"
|
||||
)
|
||||
elif (
|
||||
field_type == "JSONB"
|
||||
and field_name in record
|
||||
and record[field_name] is not None
|
||||
):
|
||||
import json
|
||||
|
||||
try:
|
||||
if isinstance(record[field_name], str):
|
||||
# Parse JSON string back to Python object
|
||||
record[field_name] = json.loads(record[field_name])
|
||||
elif isinstance(record[field_name], (dict, list)):
|
||||
# Already a Python object, keep as is
|
||||
pass
|
||||
else:
|
||||
# Try to parse as JSON
|
||||
record[field_name] = json.loads(str(record[field_name]))
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
# If parsing fails, keep as string
|
||||
logger.warning(
|
||||
f"Could not parse JSONB field {field_name}, keeping as string: {record[field_name]}"
|
||||
)
|
||||
pass
|
||||
_parseRecordFields(record, fields, f"record {recordId}")
|
||||
|
||||
return record
|
||||
except Exception as e:
|
||||
|
|
@ -737,55 +765,24 @@ class DatabaseConnector:
|
|||
cursor.execute(f'SELECT * FROM "{table}" ORDER BY "id"')
|
||||
records = [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
# Handle JSONB fields for all records
|
||||
fields = _get_model_fields(model_class)
|
||||
model_fields = model_class.model_fields # Get Pydantic model fields
|
||||
modelFields = model_class.model_fields
|
||||
for record in records:
|
||||
for field_name, field_type in fields.items():
|
||||
if field_type == "JSONB" and field_name in record:
|
||||
if record[field_name] is None:
|
||||
# Generic type-based default: List types -> [], Dict types -> {}
|
||||
# Interfaces handle domain-specific defaults
|
||||
field_info = model_fields.get(field_name)
|
||||
if field_info:
|
||||
field_annotation = field_info.annotation
|
||||
# Check if it's a List type
|
||||
if (field_annotation == list or
|
||||
(hasattr(field_annotation, "__origin__") and
|
||||
field_annotation.__origin__ is list)):
|
||||
record[field_name] = []
|
||||
# Check if it's a Dict type
|
||||
elif (field_annotation == dict or
|
||||
(hasattr(field_annotation, "__origin__") and
|
||||
field_annotation.__origin__ is dict)):
|
||||
record[field_name] = {}
|
||||
else:
|
||||
record[field_name] = None
|
||||
else:
|
||||
record[field_name] = None
|
||||
else:
|
||||
import json
|
||||
|
||||
try:
|
||||
if isinstance(record[field_name], str):
|
||||
# Parse JSON string back to Python object
|
||||
record[field_name] = json.loads(
|
||||
record[field_name]
|
||||
)
|
||||
elif isinstance(record[field_name], (dict, list)):
|
||||
# Already a Python object, keep as is
|
||||
pass
|
||||
else:
|
||||
# Try to parse as JSON
|
||||
record[field_name] = json.loads(
|
||||
str(record[field_name])
|
||||
)
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
# If parsing fails, keep as string
|
||||
logger.warning(
|
||||
f"Could not parse JSONB field {field_name}, keeping as string: {record[field_name]}"
|
||||
)
|
||||
pass
|
||||
_parseRecordFields(record, fields, f"table {table}")
|
||||
# Set type-aware defaults for NULL JSONB fields
|
||||
for fieldName, fieldType in fields.items():
|
||||
if fieldType == "JSONB" and fieldName in record and record[fieldName] is None:
|
||||
fieldInfo = modelFields.get(fieldName)
|
||||
if fieldInfo:
|
||||
fieldAnnotation = fieldInfo.annotation
|
||||
if (fieldAnnotation == list or
|
||||
(hasattr(fieldAnnotation, "__origin__") and
|
||||
fieldAnnotation.__origin__ is list)):
|
||||
record[fieldName] = []
|
||||
elif (fieldAnnotation == dict or
|
||||
(hasattr(fieldAnnotation, "__origin__") and
|
||||
fieldAnnotation.__origin__ is dict)):
|
||||
record[fieldName] = {}
|
||||
|
||||
return records
|
||||
except Exception as e:
|
||||
|
|
@ -936,70 +933,23 @@ class DatabaseConnector:
|
|||
cursor.execute(query, where_values)
|
||||
records = [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
# Handle JSONB fields and ensure numeric types are correct
|
||||
fields = _get_model_fields(model_class)
|
||||
model_fields = model_class.model_fields # Get Pydantic model fields
|
||||
modelFields = model_class.model_fields
|
||||
for record in records:
|
||||
for field_name, field_type in fields.items():
|
||||
# Ensure numeric fields (float/int) are properly typed
|
||||
# psycopg2 may return them as strings in some environments (e.g., Azure PostgreSQL)
|
||||
if field_type in ("DOUBLE PRECISION", "INTEGER") and field_name in record:
|
||||
value = record[field_name]
|
||||
if value is not None:
|
||||
try:
|
||||
if field_type == "DOUBLE PRECISION":
|
||||
record[field_name] = float(value)
|
||||
elif field_type == "INTEGER":
|
||||
record[field_name] = int(value)
|
||||
except (ValueError, TypeError):
|
||||
# If conversion fails, log warning but keep original value
|
||||
logger.warning(
|
||||
f"Could not convert {field_name} to {field_type} for record {record.get('id', 'unknown')}: {value}"
|
||||
)
|
||||
elif field_type == "JSONB" and field_name in record:
|
||||
if record[field_name] is None:
|
||||
# Generic type-based default: List types -> [], Dict types -> {}
|
||||
# Interfaces handle domain-specific defaults
|
||||
field_info = model_fields.get(field_name)
|
||||
if field_info:
|
||||
field_annotation = field_info.annotation
|
||||
# Check if it's a List type
|
||||
if (field_annotation == list or
|
||||
(hasattr(field_annotation, "__origin__") and
|
||||
field_annotation.__origin__ is list)):
|
||||
record[field_name] = []
|
||||
# Check if it's a Dict type
|
||||
elif (field_annotation == dict or
|
||||
(hasattr(field_annotation, "__origin__") and
|
||||
field_annotation.__origin__ is dict)):
|
||||
record[field_name] = {}
|
||||
else:
|
||||
record[field_name] = None
|
||||
else:
|
||||
record[field_name] = None
|
||||
else:
|
||||
import json
|
||||
|
||||
try:
|
||||
if isinstance(record[field_name], str):
|
||||
# Parse JSON string back to Python object
|
||||
record[field_name] = json.loads(
|
||||
record[field_name]
|
||||
)
|
||||
elif isinstance(record[field_name], (dict, list)):
|
||||
# Already a Python object, keep as is
|
||||
pass
|
||||
else:
|
||||
# Try to parse as JSON
|
||||
record[field_name] = json.loads(
|
||||
str(record[field_name])
|
||||
)
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
# If parsing fails, keep as string
|
||||
logger.warning(
|
||||
f"Could not parse JSONB field {field_name}, keeping as string: {record[field_name]}"
|
||||
)
|
||||
pass
|
||||
_parseRecordFields(record, fields, f"table {table}")
|
||||
for fieldName, fieldType in fields.items():
|
||||
if fieldType == "JSONB" and fieldName in record and record[fieldName] is None:
|
||||
fieldInfo = modelFields.get(fieldName)
|
||||
if fieldInfo:
|
||||
fieldAnnotation = fieldInfo.annotation
|
||||
if (fieldAnnotation == list or
|
||||
(hasattr(fieldAnnotation, "__origin__") and
|
||||
fieldAnnotation.__origin__ is list)):
|
||||
record[fieldName] = []
|
||||
elif (fieldAnnotation == dict or
|
||||
(hasattr(fieldAnnotation, "__origin__") and
|
||||
fieldAnnotation.__origin__ is dict)):
|
||||
record[fieldName] = {}
|
||||
|
||||
# If fieldFilter is available, reduce the fields
|
||||
if fieldFilter and isinstance(fieldFilter, list):
|
||||
|
|
@ -1080,7 +1030,10 @@ class DatabaseConnector:
|
|||
existingRecord.update(record)
|
||||
|
||||
# Save updated record
|
||||
self._saveRecord(model_class, recordId, existingRecord)
|
||||
saved = self._saveRecord(model_class, recordId, existingRecord)
|
||||
if not saved:
|
||||
table = model_class.__name__
|
||||
raise ValueError(f"Failed to save record {recordId} to table {table}")
|
||||
return existingRecord
|
||||
|
||||
def recordDelete(self, model_class: type, recordId: str) -> bool:
|
||||
|
|
@ -1127,6 +1080,85 @@ class DatabaseConnector:
|
|||
initialId = systemData.get(table)
|
||||
return initialId
|
||||
|
||||
def semanticSearch(
|
||||
self,
|
||||
modelClass: type,
|
||||
vectorColumn: str,
|
||||
queryVector: List[float],
|
||||
limit: int = 10,
|
||||
recordFilter: Dict[str, Any] = None,
|
||||
minScore: float = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Semantic search using pgvector cosine distance.
|
||||
|
||||
Args:
|
||||
modelClass: Pydantic model class for the table.
|
||||
vectorColumn: Name of the vector column to search.
|
||||
queryVector: Query vector as List[float].
|
||||
limit: Maximum number of results.
|
||||
recordFilter: Additional WHERE filters (field: value).
|
||||
minScore: Minimum cosine similarity (0.0 - 1.0).
|
||||
|
||||
Returns:
|
||||
List of records with an added '_score' field (cosine similarity),
|
||||
sorted by similarity descending.
|
||||
"""
|
||||
table = modelClass.__name__
|
||||
|
||||
try:
|
||||
if not self._ensureTableExists(modelClass):
|
||||
return []
|
||||
|
||||
vectorStr = f"[{','.join(str(v) for v in queryVector)}]"
|
||||
|
||||
whereConditions = []
|
||||
whereValues = []
|
||||
|
||||
if recordFilter:
|
||||
for field, value in recordFilter.items():
|
||||
if value is None:
|
||||
whereConditions.append(f'"{field}" IS NULL')
|
||||
elif isinstance(value, (list, tuple)):
|
||||
if not value:
|
||||
whereConditions.append("1 = 0")
|
||||
else:
|
||||
whereConditions.append(f'"{field}" = ANY(%s)')
|
||||
whereValues.append(list(value))
|
||||
else:
|
||||
whereConditions.append(f'"{field}" = %s')
|
||||
whereValues.append(value)
|
||||
|
||||
if minScore is not None:
|
||||
whereConditions.append(
|
||||
f'1 - ("{vectorColumn}" <=> %s::vector) >= %s'
|
||||
)
|
||||
whereValues.extend([vectorStr, minScore])
|
||||
|
||||
whereClause = ""
|
||||
if whereConditions:
|
||||
whereClause = " WHERE " + " AND ".join(whereConditions)
|
||||
|
||||
query = (
|
||||
f'SELECT *, 1 - ("{vectorColumn}" <=> %s::vector) AS "_score" '
|
||||
f'FROM "{table}"{whereClause} '
|
||||
f'ORDER BY "{vectorColumn}" <=> %s::vector '
|
||||
f'LIMIT %s'
|
||||
)
|
||||
params = [vectorStr] + whereValues + [vectorStr, limit]
|
||||
|
||||
with self.connection.cursor() as cursor:
|
||||
cursor.execute(query, params)
|
||||
records = [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
fields = _get_model_fields(modelClass)
|
||||
for record in records:
|
||||
_parseRecordFields(record, fields, f"semanticSearch {table}")
|
||||
|
||||
return records
|
||||
except Exception as e:
|
||||
logger.error(f"Error in semantic search on {table}: {e}")
|
||||
return []
|
||||
|
||||
def close(self):
|
||||
"""Close the database connection."""
|
||||
if (
|
||||
|
|
@ -1141,5 +1173,4 @@ class DatabaseConnector:
|
|||
try:
|
||||
self.close()
|
||||
except Exception:
|
||||
# Ignore errors during cleanup
|
||||
pass
|
||||
|
|
|
|||
54
modules/connectors/connectorProviderBase.py
Normal file
54
modules/connectors/connectorProviderBase.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Abstract base classes for the Provider-Connector architecture (1:n).
|
||||
|
||||
One ProviderConnector per vendor (e.g. MsftConnector, GoogleConnector).
|
||||
Each ProviderConnector exposes n ServiceAdapters (e.g. SharepointAdapter, OutlookAdapter).
|
||||
All ServiceAdapters share the same access token from the UserConnection.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class ServiceAdapter(ABC):
|
||||
"""Standardized operations for a single service of a provider."""
|
||||
|
||||
@abstractmethod
|
||||
async def browse(self, path: str, filter: Optional[str] = None) -> list:
|
||||
"""List items (files/folders) at the given path."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def download(self, path: str) -> bytes:
|
||||
"""Download a file and return its content bytes."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
||||
"""Upload a file to the given path. Returns metadata of the created entry."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def search(self, query: str, path: Optional[str] = None) -> list:
|
||||
"""Search for items matching the query."""
|
||||
...
|
||||
|
||||
|
||||
class ProviderConnector(ABC):
|
||||
"""One connector per provider. Manages a UserConnection + token.
|
||||
Provides access to n services of the provider."""
|
||||
|
||||
def __init__(self, connection, accessToken: str):
|
||||
self.connection = connection
|
||||
self.accessToken = accessToken
|
||||
|
||||
@abstractmethod
|
||||
def getAvailableServices(self) -> List[str]:
|
||||
"""Which services does this provider offer?"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def getServiceAdapter(self, service: str) -> ServiceAdapter:
|
||||
"""Return the ServiceAdapter for a specific service."""
|
||||
...
|
||||
94
modules/connectors/connectorResolver.py
Normal file
94
modules/connectors/connectorResolver.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""ConnectorResolver -- resolves a connectionId to the correct ProviderConnector and ServiceAdapter.
|
||||
|
||||
Registry maps authority values to ProviderConnector classes.
|
||||
The resolver loads the UserConnection, obtains a fresh token via SecurityService,
|
||||
and instantiates the appropriate connector.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, Type, Optional
|
||||
|
||||
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ConnectorResolver:
|
||||
"""Resolves connectionId → ProviderConnector (with fresh token) → ServiceAdapter."""
|
||||
|
||||
_providerRegistry: Dict[str, Type[ProviderConnector]] = {}
|
||||
|
||||
def __init__(self, securityService, dbInterface):
|
||||
"""
|
||||
Args:
|
||||
securityService: SecurityService instance (for getFreshToken)
|
||||
dbInterface: DB interface with getUserConnection(connectionId)
|
||||
"""
|
||||
self._security = securityService
|
||||
self._db = dbInterface
|
||||
self._ensureRegistered()
|
||||
|
||||
def _ensureRegistered(self):
|
||||
"""Lazy-register known providers on first instantiation."""
|
||||
if ConnectorResolver._providerRegistry:
|
||||
return
|
||||
try:
|
||||
from modules.connectors.providerMsft.connectorMsft import MsftConnector
|
||||
ConnectorResolver._providerRegistry["msft"] = MsftConnector
|
||||
except ImportError:
|
||||
logger.warning("MsftConnector not available")
|
||||
|
||||
try:
|
||||
from modules.connectors.providerGoogle.connectorGoogle import GoogleConnector
|
||||
ConnectorResolver._providerRegistry["google"] = GoogleConnector
|
||||
except ImportError:
|
||||
logger.debug("GoogleConnector not available (stub)")
|
||||
|
||||
try:
|
||||
from modules.connectors.providerFtp.connectorFtp import FtpConnector
|
||||
ConnectorResolver._providerRegistry["local:ftp"] = FtpConnector
|
||||
except ImportError:
|
||||
logger.debug("FtpConnector not available (stub)")
|
||||
|
||||
async def resolve(self, connectionId: str) -> ProviderConnector:
|
||||
"""Resolve connectionId to a ProviderConnector with a fresh access token."""
|
||||
connection = await self._loadConnection(connectionId)
|
||||
if not connection:
|
||||
raise ValueError(f"UserConnection not found: {connectionId}")
|
||||
|
||||
authority = getattr(connection, "authority", None)
|
||||
if not authority:
|
||||
raise ValueError(f"Connection {connectionId} has no authority")
|
||||
|
||||
authorityStr = authority.value if hasattr(authority, "value") else str(authority)
|
||||
providerClass = self._providerRegistry.get(authorityStr)
|
||||
if not providerClass:
|
||||
raise ValueError(f"No ProviderConnector registered for authority: {authorityStr}")
|
||||
|
||||
token = self._security.getFreshToken(connectionId)
|
||||
if not token or not token.tokenAccess:
|
||||
raise ValueError(f"No valid token for connection {connectionId}")
|
||||
|
||||
return providerClass(connection, token.tokenAccess)
|
||||
|
||||
async def resolveService(self, connectionId: str, service: str) -> ServiceAdapter:
|
||||
"""Resolve connectionId + service name to a concrete ServiceAdapter."""
|
||||
provider = await self.resolve(connectionId)
|
||||
available = provider.getAvailableServices()
|
||||
if service not in available:
|
||||
raise ValueError(f"Service '{service}' not available. Options: {available}")
|
||||
return provider.getServiceAdapter(service)
|
||||
|
||||
async def _loadConnection(self, connectionId: str) -> Optional[Any]:
|
||||
"""Load UserConnection from DB."""
|
||||
try:
|
||||
if hasattr(self._db, "getUserConnection"):
|
||||
return self._db.getUserConnection(connectionId)
|
||||
if hasattr(self._db, "loadRecord"):
|
||||
from modules.datamodels.datamodelUam import UserConnection
|
||||
return self._db.loadRecord(UserConnection, connectionId)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load connection {connectionId}: {e}")
|
||||
return None
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
|
||||
"""FTP/SFTP Provider Connector stub."""
|
||||
48
modules/connectors/providerFtp/connectorFtp.py
Normal file
48
modules/connectors/providerFtp/connectorFtp.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""FTP/SFTP ProviderConnector stub.
|
||||
|
||||
Implements the ProviderConnector interface for FTP/SFTP file access.
|
||||
Full implementation follows when FTP integration is prioritized.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
|
||||
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter
|
||||
from modules.datamodels.datamodelDataSource import ExternalEntry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FtpFilesAdapter(ServiceAdapter):
|
||||
"""FTP files ServiceAdapter (stub)."""
|
||||
|
||||
def __init__(self, accessToken: str):
|
||||
self._accessToken = accessToken
|
||||
|
||||
async def browse(self, path: str, filter: Optional[str] = None) -> List[ExternalEntry]:
|
||||
logger.info(f"FTP browse stub: {path}")
|
||||
return []
|
||||
|
||||
async def download(self, path: str) -> bytes:
|
||||
logger.info(f"FTP download stub: {path}")
|
||||
return b""
|
||||
|
||||
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
||||
return {"error": "FTP upload not yet implemented"}
|
||||
|
||||
async def search(self, query: str, path: Optional[str] = None) -> List[ExternalEntry]:
|
||||
return []
|
||||
|
||||
|
||||
class FtpConnector(ProviderConnector):
|
||||
"""FTP ProviderConnector -- 1 connection -> files."""
|
||||
|
||||
def getAvailableServices(self) -> List[str]:
|
||||
return ["files"]
|
||||
|
||||
def getServiceAdapter(self, service: str) -> ServiceAdapter:
|
||||
if service != "files":
|
||||
raise ValueError(f"FTP only supports 'files' service, got '{service}'")
|
||||
return FtpFilesAdapter(self.accessToken)
|
||||
3
modules/connectors/providerGoogle/__init__.py
Normal file
3
modules/connectors/providerGoogle/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Google Provider Connector -- 1 Connection : n Services (Drive, Gmail)."""
|
||||
232
modules/connectors/providerGoogle/connectorGoogle.py
Normal file
232
modules/connectors/providerGoogle/connectorGoogle.py
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Google ProviderConnector -- Drive and Gmail via Google OAuth."""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import aiohttp
|
||||
|
||||
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter
|
||||
from modules.datamodels.datamodelDataSource import ExternalEntry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DRIVE_BASE = "https://www.googleapis.com/drive/v3"
|
||||
_GMAIL_BASE = "https://gmail.googleapis.com/gmail/v1"
|
||||
|
||||
|
||||
async def _googleGet(token: str, url: str) -> Dict[str, Any]:
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
timeout = aiohttp.ClientTimeout(total=20)
|
||||
try:
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
async with session.get(url, headers=headers) as resp:
|
||||
if resp.status in (200, 201):
|
||||
return await resp.json()
|
||||
errorText = await resp.text()
|
||||
logger.warning(f"Google API {resp.status}: {errorText[:300]}")
|
||||
return {"error": f"{resp.status}: {errorText[:200]}"}
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
class DriveAdapter(ServiceAdapter):
|
||||
"""Google Drive ServiceAdapter -- browse files and folders."""
|
||||
|
||||
def __init__(self, accessToken: str):
|
||||
self._token = accessToken
|
||||
|
||||
async def browse(self, path: str, filter: Optional[str] = None) -> List[ExternalEntry]:
|
||||
folderId = (path or "").strip("/") or "root"
|
||||
query = f"'{folderId}' in parents and trashed=false"
|
||||
fields = "files(id,name,mimeType,size,modifiedTime,parents)"
|
||||
url = f"{_DRIVE_BASE}/files?q={query}&fields={fields}&pageSize=100&orderBy=folder,name"
|
||||
|
||||
result = await _googleGet(self._token, url)
|
||||
if "error" in result:
|
||||
logger.warning(f"Google Drive browse failed: {result['error']}")
|
||||
return []
|
||||
|
||||
entries = []
|
||||
for f in result.get("files", []):
|
||||
isFolder = f.get("mimeType") == "application/vnd.google-apps.folder"
|
||||
entries.append(ExternalEntry(
|
||||
name=f.get("name", ""),
|
||||
path=f"/{f.get('id', '')}",
|
||||
isFolder=isFolder,
|
||||
size=int(f.get("size", 0)) if f.get("size") else None,
|
||||
mimeType=f.get("mimeType") if not isFolder else None,
|
||||
metadata={"id": f.get("id"), "modifiedTime": f.get("modifiedTime")},
|
||||
))
|
||||
return entries
|
||||
|
||||
_EXPORT_MIME_MAP = {
|
||||
"application/vnd.google-apps.document": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"application/vnd.google-apps.spreadsheet": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"application/vnd.google-apps.presentation": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"application/vnd.google-apps.drawing": "application/pdf",
|
||||
}
|
||||
|
||||
async def download(self, path: str) -> bytes:
|
||||
fileId = (path or "").strip("/")
|
||||
if not fileId:
|
||||
return b""
|
||||
headers = {"Authorization": f"Bearer {self._token}"}
|
||||
timeout = aiohttp.ClientTimeout(total=60)
|
||||
try:
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
# Try direct download first
|
||||
url = f"{_DRIVE_BASE}/files/{fileId}?alt=media"
|
||||
async with session.get(url, headers=headers) as resp:
|
||||
if resp.status == 200:
|
||||
return await resp.read()
|
||||
logger.debug(f"Google Drive direct download returned {resp.status} for {fileId}")
|
||||
|
||||
# If 403/404, check if it's a native Google file that needs export
|
||||
metaUrl = f"{_DRIVE_BASE}/files/{fileId}?fields=mimeType,name"
|
||||
async with session.get(metaUrl, headers=headers) as metaResp:
|
||||
if metaResp.status != 200:
|
||||
logger.warning(f"Google Drive metadata fetch failed ({metaResp.status}) for {fileId}")
|
||||
return b""
|
||||
meta = await metaResp.json()
|
||||
fileMime = meta.get("mimeType", "")
|
||||
fileName = meta.get("name", fileId)
|
||||
|
||||
exportMime = self._EXPORT_MIME_MAP.get(fileMime)
|
||||
if not exportMime:
|
||||
logger.warning(f"Google Drive: unsupported mimeType '{fileMime}' for file '{fileName}' ({fileId})")
|
||||
return b""
|
||||
|
||||
exportUrl = f"{_DRIVE_BASE}/files/{fileId}/export?mimeType={exportMime}"
|
||||
logger.info(f"Google Drive: exporting '{fileName}' as {exportMime}")
|
||||
async with session.get(exportUrl, headers=headers) as exportResp:
|
||||
if exportResp.status == 200:
|
||||
return await exportResp.read()
|
||||
logger.warning(f"Google Drive export failed ({exportResp.status}) for '{fileName}'")
|
||||
except Exception as e:
|
||||
logger.error(f"Google Drive download failed for {fileId}: {e}")
|
||||
return b""
|
||||
|
||||
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
||||
return {"error": "Google Drive upload not yet implemented"}
|
||||
|
||||
async def search(self, query: str, path: Optional[str] = None) -> List[ExternalEntry]:
|
||||
safeQuery = query.replace("'", "\\'")
|
||||
folderId = (path or "").strip("/")
|
||||
qParts = [f"name contains '{safeQuery}'", "trashed=false"]
|
||||
if folderId:
|
||||
qParts.append(f"'{folderId}' in parents")
|
||||
qStr = " and ".join(qParts)
|
||||
url = f"{_DRIVE_BASE}/files?q={qStr}&fields=files(id,name,mimeType,size)&pageSize=25"
|
||||
logger.debug(f"Google Drive search: q={qStr}")
|
||||
result = await _googleGet(self._token, url)
|
||||
if "error" in result:
|
||||
return []
|
||||
return [
|
||||
ExternalEntry(
|
||||
name=f.get("name", ""),
|
||||
path=f"/{f.get('id', '')}",
|
||||
isFolder=f.get("mimeType") == "application/vnd.google-apps.folder",
|
||||
size=int(f.get("size", 0)) if f.get("size") else None,
|
||||
)
|
||||
for f in result.get("files", [])
|
||||
]
|
||||
|
||||
|
||||
class GmailAdapter(ServiceAdapter):
|
||||
"""Gmail ServiceAdapter -- browse labels and messages."""
|
||||
|
||||
def __init__(self, accessToken: str):
|
||||
self._token = accessToken
|
||||
|
||||
async def browse(self, path: str, filter: Optional[str] = None) -> list:
|
||||
cleanPath = (path or "").strip("/")
|
||||
|
||||
if not cleanPath:
|
||||
url = f"{_GMAIL_BASE}/users/me/labels"
|
||||
result = await _googleGet(self._token, url)
|
||||
if "error" in result:
|
||||
logger.warning(f"Gmail labels failed: {result['error']}")
|
||||
return []
|
||||
_SYSTEM_LABELS = {"INBOX", "SENT", "DRAFT", "TRASH", "SPAM", "STARRED", "IMPORTANT"}
|
||||
labels = []
|
||||
for lbl in result.get("labels", []):
|
||||
labelId = lbl.get("id", "")
|
||||
labelName = lbl.get("name", labelId)
|
||||
if lbl.get("type") == "system" and labelId not in _SYSTEM_LABELS:
|
||||
continue
|
||||
labels.append(ExternalEntry(
|
||||
name=labelName,
|
||||
path=f"/{labelId}",
|
||||
isFolder=True,
|
||||
metadata={"id": labelId, "type": lbl.get("type", "")},
|
||||
))
|
||||
labels.sort(key=lambda e: (0 if e.metadata.get("type") == "system" else 1, e.name))
|
||||
return labels
|
||||
|
||||
url = f"{_GMAIL_BASE}/users/me/messages?labelIds={cleanPath}&maxResults=25"
|
||||
result = await _googleGet(self._token, url)
|
||||
if "error" in result:
|
||||
return []
|
||||
|
||||
entries = []
|
||||
for msg in result.get("messages", [])[:25]:
|
||||
msgId = msg.get("id", "")
|
||||
detailUrl = f"{_GMAIL_BASE}/users/me/messages/{msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=Date"
|
||||
detail = await _googleGet(self._token, detailUrl)
|
||||
if "error" in detail:
|
||||
entries.append(ExternalEntry(name=f"Message {msgId}", path=f"/{cleanPath}/{msgId}", isFolder=False))
|
||||
continue
|
||||
headers = {h.get("name", ""): h.get("value", "") for h in detail.get("payload", {}).get("headers", [])}
|
||||
entries.append(ExternalEntry(
|
||||
name=headers.get("Subject", "(no subject)"),
|
||||
path=f"/{cleanPath}/{msgId}",
|
||||
isFolder=False,
|
||||
metadata={
|
||||
"id": msgId,
|
||||
"from": headers.get("From", ""),
|
||||
"date": headers.get("Date", ""),
|
||||
"snippet": detail.get("snippet", ""),
|
||||
},
|
||||
))
|
||||
return entries
|
||||
|
||||
async def download(self, path: str) -> bytes:
|
||||
return b""
|
||||
|
||||
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
||||
return {"error": "Gmail upload not applicable"}
|
||||
|
||||
async def search(self, query: str, path: Optional[str] = None) -> list:
|
||||
url = f"{_GMAIL_BASE}/users/me/messages?q={query}&maxResults=10"
|
||||
result = await _googleGet(self._token, url)
|
||||
if "error" in result:
|
||||
return []
|
||||
return [
|
||||
ExternalEntry(
|
||||
name=f"Message {m.get('id', '')}",
|
||||
path=f"/{m.get('id', '')}",
|
||||
isFolder=False,
|
||||
metadata={"id": m.get("id")},
|
||||
)
|
||||
for m in result.get("messages", [])
|
||||
]
|
||||
|
||||
|
||||
class GoogleConnector(ProviderConnector):
|
||||
"""Google ProviderConnector -- 1 connection -> Drive + Gmail."""
|
||||
|
||||
_SERVICE_MAP = {
|
||||
"drive": DriveAdapter,
|
||||
"gmail": GmailAdapter,
|
||||
}
|
||||
|
||||
def getAvailableServices(self) -> List[str]:
|
||||
return list(self._SERVICE_MAP.keys())
|
||||
|
||||
def getServiceAdapter(self, service: str) -> ServiceAdapter:
|
||||
adapterClass = self._SERVICE_MAP.get(service)
|
||||
if not adapterClass:
|
||||
raise ValueError(f"Unknown Google service: {service}. Available: {list(self._SERVICE_MAP.keys())}")
|
||||
return adapterClass(self.accessToken)
|
||||
3
modules/connectors/providerMsft/__init__.py
Normal file
3
modules/connectors/providerMsft/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Microsoft Provider Connector -- 1 Connection : n Services (SharePoint, Outlook, Teams, OneDrive)."""
|
||||
459
modules/connectors/providerMsft/connectorMsft.py
Normal file
459
modules/connectors/providerMsft/connectorMsft.py
Normal file
|
|
@ -0,0 +1,459 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Microsoft ProviderConnector -- one MSFT connection serves SharePoint, Outlook, Teams, OneDrive.
|
||||
|
||||
All ServiceAdapters share the same OAuth access token obtained from the
|
||||
UserConnection (authority=msft).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import aiohttp
|
||||
import asyncio
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter
|
||||
from modules.datamodels.datamodelDataSource import ExternalEntry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_GRAPH_BASE = "https://graph.microsoft.com/v1.0"
|
||||
|
||||
|
||||
class _GraphApiMixin:
|
||||
"""Shared Graph API call logic for all MSFT service adapters."""
|
||||
|
||||
def __init__(self, accessToken: str):
|
||||
self._accessToken = accessToken
|
||||
|
||||
async def _graphGet(self, endpoint: str) -> Dict[str, Any]:
|
||||
return await _makeGraphCall(self._accessToken, endpoint, "GET")
|
||||
|
||||
async def _graphPost(self, endpoint: str, data: Any = None) -> Dict[str, Any]:
|
||||
return await _makeGraphCall(self._accessToken, endpoint, "POST", data)
|
||||
|
||||
async def _graphPut(self, endpoint: str, data: bytes = None) -> Dict[str, Any]:
|
||||
return await _makeGraphCall(self._accessToken, endpoint, "PUT", data)
|
||||
|
||||
async def _graphDelete(self, endpoint: str) -> Dict[str, Any]:
|
||||
return await _makeGraphCall(self._accessToken, endpoint, "DELETE")
|
||||
|
||||
async def _graphDownload(self, endpoint: str) -> Optional[bytes]:
|
||||
"""Download binary content from Graph API."""
|
||||
headers = {"Authorization": f"Bearer {self._accessToken}"}
|
||||
timeout = aiohttp.ClientTimeout(total=60)
|
||||
url = f"{_GRAPH_BASE}/{endpoint.lstrip('/')}"
|
||||
try:
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
async with session.get(url, headers=headers) as resp:
|
||||
if resp.status == 200:
|
||||
return await resp.read()
|
||||
logger.error(f"Download failed {resp.status}: {await resp.text()}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Graph download error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def _makeGraphCall(
|
||||
token: str, endpoint: str, method: str = "GET", data: Any = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Execute a single Microsoft Graph API call."""
|
||||
url = f"{_GRAPH_BASE}/{endpoint.lstrip('/')}"
|
||||
contentType = "application/json"
|
||||
if method == "PUT" and isinstance(data, bytes):
|
||||
contentType = "application/octet-stream"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": contentType,
|
||||
}
|
||||
timeout = aiohttp.ClientTimeout(total=30)
|
||||
try:
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
kwargs: Dict[str, Any] = {"headers": headers}
|
||||
if data is not None:
|
||||
kwargs["data"] = data
|
||||
|
||||
if method == "GET":
|
||||
async with session.get(url, **kwargs) as resp:
|
||||
return await _handleResponse(resp)
|
||||
elif method == "POST":
|
||||
async with session.post(url, **kwargs) as resp:
|
||||
return await _handleResponse(resp)
|
||||
elif method == "PUT":
|
||||
async with session.put(url, **kwargs) as resp:
|
||||
return await _handleResponse(resp)
|
||||
elif method == "DELETE":
|
||||
async with session.delete(url, **kwargs) as resp:
|
||||
if resp.status in (200, 204):
|
||||
return {}
|
||||
return await _handleResponse(resp)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
return {"error": f"Graph API timeout: {endpoint}"}
|
||||
except Exception as e:
|
||||
return {"error": f"Graph API error: {e}"}
|
||||
|
||||
return {"error": f"Unsupported method: {method}"}
|
||||
|
||||
|
||||
async def _handleResponse(resp: aiohttp.ClientResponse) -> Dict[str, Any]:
|
||||
if resp.status in (200, 201):
|
||||
return await resp.json()
|
||||
errorText = await resp.text()
|
||||
logger.error(f"Graph API {resp.status}: {errorText}")
|
||||
return {"error": f"{resp.status}: {errorText}"}
|
||||
|
||||
|
||||
def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry:
|
||||
isFolder = "folder" in item
|
||||
return ExternalEntry(
|
||||
name=item.get("name", ""),
|
||||
path=f"{basePath}/{item.get('name', '')}" if basePath else item.get("name", ""),
|
||||
isFolder=isFolder,
|
||||
size=item.get("size"),
|
||||
mimeType=item.get("file", {}).get("mimeType") if not isFolder else None,
|
||||
lastModified=None,
|
||||
metadata={
|
||||
"id": item.get("id"),
|
||||
"webUrl": item.get("webUrl"),
|
||||
"childCount": item.get("folder", {}).get("childCount") if isFolder else None,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SharePoint Adapter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
|
||||
"""ServiceAdapter for SharePoint (files, sites) via Microsoft Graph."""
|
||||
|
||||
async def browse(self, path: str, filter: Optional[str] = None) -> List[ExternalEntry]:
|
||||
"""List items in a SharePoint folder.
|
||||
|
||||
Path format: /sites/<SiteName>/<FolderPath>
|
||||
Root "/" lists available sites via discovery.
|
||||
"""
|
||||
if not path or path == "/":
|
||||
return await self._discoverSites()
|
||||
|
||||
siteId, folderPath = _parseSharepointPath(path)
|
||||
if not siteId:
|
||||
return await self._discoverSites()
|
||||
|
||||
if not folderPath or folderPath == "/":
|
||||
endpoint = f"sites/{siteId}/drive/root/children"
|
||||
else:
|
||||
cleanPath = folderPath.lstrip("/")
|
||||
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children"
|
||||
|
||||
result = await self._graphGet(endpoint)
|
||||
if "error" in result:
|
||||
logger.warning(f"SharePoint browse failed: {result['error']}")
|
||||
return []
|
||||
|
||||
entries = [_graphItemToExternalEntry(item, path) for item in result.get("value", [])]
|
||||
if filter:
|
||||
entries = [e for e in entries if _matchFilter(e, filter)]
|
||||
return entries
|
||||
|
||||
async def _discoverSites(self) -> List[ExternalEntry]:
|
||||
"""Discover accessible SharePoint sites."""
|
||||
result = await self._graphGet("sites?search=*&$top=50")
|
||||
if "error" in result:
|
||||
logger.warning(f"SharePoint site discovery failed: {result['error']}")
|
||||
return []
|
||||
return [
|
||||
ExternalEntry(
|
||||
name=s.get("displayName") or s.get("name", ""),
|
||||
path=f"/sites/{s.get('id', '')}",
|
||||
isFolder=True,
|
||||
metadata={
|
||||
"id": s.get("id"),
|
||||
"webUrl": s.get("webUrl"),
|
||||
"description": s.get("description", ""),
|
||||
},
|
||||
)
|
||||
for s in result.get("value", [])
|
||||
if s.get("displayName")
|
||||
]
|
||||
|
||||
async def download(self, path: str) -> bytes:
|
||||
siteId, filePath = _parseSharepointPath(path)
|
||||
if not siteId or not filePath:
|
||||
return b""
|
||||
cleanPath = filePath.strip("/")
|
||||
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/content"
|
||||
data = await self._graphDownload(endpoint)
|
||||
return data or b""
|
||||
|
||||
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
||||
siteId, folderPath = _parseSharepointPath(path)
|
||||
if not siteId:
|
||||
return {"error": "Invalid SharePoint path"}
|
||||
cleanFolder = (folderPath or "").strip("/")
|
||||
uploadPath = f"{cleanFolder}/{fileName}" if cleanFolder else fileName
|
||||
endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content"
|
||||
result = await self._graphPut(endpoint, data)
|
||||
return result
|
||||
|
||||
async def search(self, query: str, path: Optional[str] = None) -> List[ExternalEntry]:
|
||||
siteId, _ = _parseSharepointPath(path or "")
|
||||
if not siteId:
|
||||
return []
|
||||
safeQuery = query.replace("'", "''")
|
||||
endpoint = f"sites/{siteId}/drive/root/search(q='{safeQuery}')"
|
||||
result = await self._graphGet(endpoint)
|
||||
if "error" in result:
|
||||
return []
|
||||
return [_graphItemToExternalEntry(item) for item in result.get("value", [])]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Outlook Adapter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
|
||||
"""ServiceAdapter for Outlook (mail, calendar) via Microsoft Graph."""
|
||||
|
||||
async def browse(self, path: str, filter: Optional[str] = None) -> List[ExternalEntry]:
|
||||
"""List mail folders or messages.
|
||||
|
||||
path = "" or "/" → list mail folders
|
||||
path = "/Inbox" → list messages in Inbox
|
||||
"""
|
||||
if not path or path == "/":
|
||||
result = await self._graphGet("me/mailFolders")
|
||||
if "error" in result:
|
||||
return []
|
||||
return [
|
||||
ExternalEntry(
|
||||
name=f.get("displayName", ""),
|
||||
path=f"/{f.get('id', '')}",
|
||||
isFolder=True,
|
||||
metadata={"id": f.get("id"), "totalItemCount": f.get("totalItemCount")},
|
||||
)
|
||||
for f in result.get("value", [])
|
||||
]
|
||||
|
||||
folderId = path.strip("/")
|
||||
endpoint = f"me/mailFolders/{folderId}/messages?$top=25&$orderby=receivedDateTime desc"
|
||||
result = await self._graphGet(endpoint)
|
||||
if "error" in result:
|
||||
return []
|
||||
return [
|
||||
ExternalEntry(
|
||||
name=m.get("subject", "(no subject)"),
|
||||
path=f"{path}/{m.get('id', '')}",
|
||||
isFolder=False,
|
||||
metadata={
|
||||
"id": m.get("id"),
|
||||
"from": m.get("from", {}).get("emailAddress", {}).get("address"),
|
||||
"receivedDateTime": m.get("receivedDateTime"),
|
||||
"hasAttachments": m.get("hasAttachments", False),
|
||||
},
|
||||
)
|
||||
for m in result.get("value", [])
|
||||
]
|
||||
|
||||
async def download(self, path: str) -> bytes:
|
||||
"""Download a mail message as JSON bytes."""
|
||||
import json
|
||||
messageId = path.strip("/").split("/")[-1]
|
||||
result = await self._graphGet(f"me/messages/{messageId}")
|
||||
if "error" in result:
|
||||
return b""
|
||||
return json.dumps(result, ensure_ascii=False).encode("utf-8")
|
||||
|
||||
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
||||
"""Not applicable for Outlook in the file sense."""
|
||||
return {"error": "Upload not supported for Outlook"}
|
||||
|
||||
async def search(self, query: str, path: Optional[str] = None) -> List[ExternalEntry]:
|
||||
safeQuery = query.replace("'", "''")
|
||||
endpoint = f"me/messages?$search=\"{safeQuery}\"&$top=25"
|
||||
result = await self._graphGet(endpoint)
|
||||
if "error" in result:
|
||||
return []
|
||||
return [
|
||||
ExternalEntry(
|
||||
name=m.get("subject", "(no subject)"),
|
||||
path=f"/search/{m.get('id', '')}",
|
||||
isFolder=False,
|
||||
metadata={
|
||||
"id": m.get("id"),
|
||||
"from": m.get("from", {}).get("emailAddress", {}).get("address"),
|
||||
"receivedDateTime": m.get("receivedDateTime"),
|
||||
},
|
||||
)
|
||||
for m in result.get("value", [])
|
||||
]
|
||||
|
||||
async def sendMail(
|
||||
self, to: List[str], subject: str, body: str,
|
||||
cc: Optional[List[str]] = None, attachments: Optional[List[Dict]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Send an email via Microsoft Graph."""
|
||||
import json
|
||||
message: Dict[str, Any] = {
|
||||
"subject": subject,
|
||||
"body": {"contentType": "Text", "content": body},
|
||||
"toRecipients": [{"emailAddress": {"address": addr}} for addr in to],
|
||||
}
|
||||
if cc:
|
||||
message["ccRecipients"] = [{"emailAddress": {"address": addr}} for addr in cc]
|
||||
|
||||
payload = json.dumps({"message": message, "saveToSentItems": True}).encode("utf-8")
|
||||
result = await self._graphPost("me/sendMail", payload)
|
||||
if "error" in result:
|
||||
return result
|
||||
return {"success": True}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Teams Adapter (Stub)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TeamsAdapter(_GraphApiMixin, ServiceAdapter):
|
||||
"""ServiceAdapter for Microsoft Teams -- browse joined teams and channels."""
|
||||
|
||||
async def browse(self, path: str, filter: Optional[str] = None) -> list:
|
||||
cleanPath = (path or "").strip("/")
|
||||
|
||||
if not cleanPath:
|
||||
result = await self._graphGet("me/joinedTeams")
|
||||
if "error" in result:
|
||||
logger.warning(f"Teams browse failed: {result['error']}")
|
||||
return []
|
||||
return [
|
||||
ExternalEntry(
|
||||
name=t.get("displayName", ""),
|
||||
path=f"/{t.get('id', '')}",
|
||||
isFolder=True,
|
||||
metadata={"id": t.get("id"), "description": t.get("description", "")},
|
||||
)
|
||||
for t in result.get("value", [])
|
||||
]
|
||||
|
||||
parts = cleanPath.split("/", 1)
|
||||
teamId = parts[0]
|
||||
if len(parts) == 1:
|
||||
result = await self._graphGet(f"teams/{teamId}/channels")
|
||||
if "error" in result:
|
||||
return []
|
||||
return [
|
||||
ExternalEntry(
|
||||
name=ch.get("displayName", ""),
|
||||
path=f"/{teamId}/{ch.get('id', '')}",
|
||||
isFolder=True,
|
||||
metadata={"id": ch.get("id"), "membershipType": ch.get("membershipType", "")},
|
||||
)
|
||||
for ch in result.get("value", [])
|
||||
]
|
||||
|
||||
return []
|
||||
|
||||
async def download(self, path: str) -> bytes:
|
||||
return b""
|
||||
|
||||
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
||||
return {"error": "Teams upload not implemented"}
|
||||
|
||||
async def search(self, query: str, path: Optional[str] = None) -> list:
|
||||
return []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OneDrive Adapter (Stub -- similar to SharePoint but personal drive)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class OneDriveAdapter(_GraphApiMixin, ServiceAdapter):
|
||||
"""ServiceAdapter stub for OneDrive (personal drive)."""
|
||||
|
||||
async def browse(self, path: str, filter: Optional[str] = None) -> List[ExternalEntry]:
|
||||
cleanPath = (path or "").strip("/")
|
||||
if not cleanPath:
|
||||
endpoint = "me/drive/root/children"
|
||||
else:
|
||||
endpoint = f"me/drive/root:/{cleanPath}:/children"
|
||||
|
||||
result = await self._graphGet(endpoint)
|
||||
if "error" in result:
|
||||
return []
|
||||
entries = [_graphItemToExternalEntry(item, path) for item in result.get("value", [])]
|
||||
if filter:
|
||||
entries = [e for e in entries if _matchFilter(e, filter)]
|
||||
return entries
|
||||
|
||||
async def download(self, path: str) -> bytes:
|
||||
cleanPath = (path or "").strip("/")
|
||||
if not cleanPath:
|
||||
return b""
|
||||
data = await self._graphDownload(f"me/drive/root:/{cleanPath}:/content")
|
||||
return data or b""
|
||||
|
||||
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
||||
cleanPath = (path or "").strip("/")
|
||||
uploadPath = f"{cleanPath}/{fileName}" if cleanPath else fileName
|
||||
endpoint = f"me/drive/root:/{uploadPath}:/content"
|
||||
return await self._graphPut(endpoint, data)
|
||||
|
||||
async def search(self, query: str, path: Optional[str] = None) -> List[ExternalEntry]:
|
||||
safeQuery = query.replace("'", "''")
|
||||
endpoint = f"me/drive/root/search(q='{safeQuery}')"
|
||||
result = await self._graphGet(endpoint)
|
||||
if "error" in result:
|
||||
return []
|
||||
return [_graphItemToExternalEntry(item) for item in result.get("value", [])]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MsftConnector (1:n)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class MsftConnector(ProviderConnector):
|
||||
"""Microsoft ProviderConnector -- 1 connection → n services."""
|
||||
|
||||
_SERVICE_MAP = {
|
||||
"sharepoint": SharepointAdapter,
|
||||
"outlook": OutlookAdapter,
|
||||
"teams": TeamsAdapter,
|
||||
"onedrive": OneDriveAdapter,
|
||||
}
|
||||
|
||||
def getAvailableServices(self) -> List[str]:
|
||||
return list(self._SERVICE_MAP.keys())
|
||||
|
||||
def getServiceAdapter(self, service: str) -> ServiceAdapter:
|
||||
adapterClass = self._SERVICE_MAP.get(service)
|
||||
if not adapterClass:
|
||||
raise ValueError(f"Unknown MSFT service: {service}. Available: {list(self._SERVICE_MAP.keys())}")
|
||||
return adapterClass(self.accessToken)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _parseSharepointPath(path: str) -> tuple:
|
||||
"""Parse a SharePoint path into (siteId, innerPath).
|
||||
|
||||
Expected format: /sites/<siteId>/<innerPath>
|
||||
Also accepts bare siteId if no /sites/ prefix.
|
||||
"""
|
||||
if not path:
|
||||
return ("", "")
|
||||
clean = path.strip("/")
|
||||
if clean.startswith("sites/"):
|
||||
parts = clean.split("/", 2)
|
||||
siteId = parts[1] if len(parts) > 1 else ""
|
||||
innerPath = parts[2] if len(parts) > 2 else ""
|
||||
return (siteId, innerPath)
|
||||
parts = clean.split("/", 1)
|
||||
return (parts[0], parts[1] if len(parts) > 1 else "")
|
||||
|
||||
|
||||
def _matchFilter(entry: ExternalEntry, pattern: str) -> bool:
|
||||
"""Simple glob-like filter (supports * wildcard)."""
|
||||
import fnmatch
|
||||
return fnmatch.fnmatch(entry.name.lower(), pattern.lower())
|
||||
|
|
@ -26,6 +26,12 @@ class OperationTypeEnum(str, Enum):
|
|||
WEB_SEARCH_DATA = "webSearch" # Returns list of URLs only
|
||||
WEB_CRAWL = "webCrawl" # Web crawl for a given URL
|
||||
|
||||
# Agent Operations
|
||||
AGENT = "agent" # Agent loop: reasoning + tool use
|
||||
|
||||
# Embedding Operations
|
||||
EMBEDDING = "embedding" # Text → vector conversion for semantic search
|
||||
|
||||
# Speech Operations (dedicated pipeline, bypasses standard model selection)
|
||||
SPEECH_TEAMS = "speechTeams" # Teams Meeting AI analysis: decide if/how to respond
|
||||
|
||||
|
|
@ -102,6 +108,7 @@ class AiModel(BaseModel):
|
|||
|
||||
# Function reference (not serialized)
|
||||
functionCall: Optional[Callable] = Field(default=None, exclude=True, description="Function to call for this model")
|
||||
functionCallStream: Optional[Callable] = Field(default=None, exclude=True, description="Streaming function: yields str deltas, then final AiModelResponse")
|
||||
calculatepriceCHF: Optional[Callable] = Field(default=None, exclude=True, description="Function to calculate price in USD")
|
||||
|
||||
# Selection criteria - capabilities with ratings
|
||||
|
|
@ -155,10 +162,12 @@ class AiCallOptions(BaseModel):
|
|||
class AiCallRequest(BaseModel):
|
||||
"""Centralized AI call request payload for interface use."""
|
||||
|
||||
prompt: str = Field(description="The user prompt")
|
||||
prompt: str = Field(default="", description="The user prompt")
|
||||
context: Optional[str] = Field(default=None, description="Optional external context (e.g., extracted docs)")
|
||||
options: AiCallOptions = Field(default_factory=AiCallOptions)
|
||||
contentParts: Optional[List['ContentPart']] = None # NEW: Content parts for model-aware chunking
|
||||
contentParts: Optional[List['ContentPart']] = None # Content parts for model-aware chunking
|
||||
messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="OpenAI-style messages for multi-turn agent conversations")
|
||||
tools: Optional[List[Dict[str, Any]]] = Field(default=None, description="Tool definitions for native function calling")
|
||||
|
||||
|
||||
class AiCallResponse(BaseModel):
|
||||
|
|
@ -172,14 +181,19 @@ class AiCallResponse(BaseModel):
|
|||
bytesSent: int = Field(default=0, description="Input data size in bytes")
|
||||
bytesReceived: int = Field(default=0, description="Output data size in bytes")
|
||||
errorCount: int = Field(default=0, description="0 for success, 1+ for errors")
|
||||
toolCalls: Optional[List[Dict[str, Any]]] = Field(default=None, description="Tool calls from native function calling")
|
||||
metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional response metadata (e.g. embeddings vectors)")
|
||||
|
||||
|
||||
class AiModelCall(BaseModel):
|
||||
"""Standardized input for AI model calls."""
|
||||
|
||||
messages: List[Dict[str, Any]] = Field(description="Messages in OpenAI format (role, content)")
|
||||
messages: List[Dict[str, Any]] = Field(default_factory=list, description="Messages in OpenAI format (role, content)")
|
||||
model: Optional[AiModel] = Field(default=None, description="The AI model being called")
|
||||
options: AiCallOptions = Field(default_factory=AiCallOptions, description="Additional model-specific options")
|
||||
tools: Optional[List[Dict[str, Any]]] = Field(default=None, description="Tool definitions for native function calling")
|
||||
toolChoice: Optional[Any] = Field(default=None, description="Tool choice: 'auto', 'none', or specific tool")
|
||||
embeddingInput: Optional[List[str]] = Field(default=None, description="Input texts for embedding models (used instead of messages)")
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
|
|
|
|||
|
|
@ -124,6 +124,12 @@ class BillingTransaction(BaseModel):
|
|||
aicoreModel: Optional[str] = Field(None, description="AICore model name (e.g., claude-4-sonnet, gpt-4o)")
|
||||
createdByUserId: Optional[str] = Field(None, description="User who created/caused this transaction")
|
||||
|
||||
# AI call metadata (for per-call analytics)
|
||||
processingTime: Optional[float] = Field(None, description="Processing time in seconds")
|
||||
bytesSent: Optional[int] = Field(None, description="Bytes sent to AI model")
|
||||
bytesReceived: Optional[int] = Field(None, description="Bytes received from AI model")
|
||||
errorCount: Optional[int] = Field(None, description="Number of errors in this call")
|
||||
|
||||
|
||||
registerModelLabels(
|
||||
"BillingTransaction",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Chat models: ChatWorkflow, ChatMessage, ChatLog, ChatStat, ChatDocument."""
|
||||
"""Chat models: ChatWorkflow, ChatMessage, ChatLog, ChatDocument."""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from enum import Enum
|
||||
|
|
@ -10,44 +10,6 @@ from modules.shared.timeUtils import getUtcTimestamp
|
|||
import uuid
|
||||
|
||||
|
||||
class ChatStat(BaseModel):
|
||||
"""Statistics for chat operations. User-owned, no mandate context."""
|
||||
model_config = {"populate_by_name": True, "extra": "allow"} # Allow DB system fields
|
||||
|
||||
id: str = Field(
|
||||
default_factory=lambda: str(uuid.uuid4()), description="Primary key"
|
||||
)
|
||||
workflowId: Optional[str] = Field(
|
||||
None, description="Foreign key to workflow (for workflow stats)"
|
||||
)
|
||||
processingTime: Optional[float] = Field(
|
||||
None, description="Processing time in seconds"
|
||||
)
|
||||
bytesSent: Optional[int] = Field(None, description="Number of bytes sent")
|
||||
bytesReceived: Optional[int] = Field(None, description="Number of bytes received")
|
||||
errorCount: Optional[int] = Field(None, description="Number of errors encountered")
|
||||
process: Optional[str] = Field(None, description="The process that delivers the stats data (e.g. 'action.outlook.readMails', 'ai.process.document.name')")
|
||||
engine: Optional[str] = Field(None, description="The engine used (e.g. 'ai.anthropic.35', 'ai.tavily.basic', 'renderer.docx')")
|
||||
priceCHF: Optional[float] = Field(None, description="Calculated price in USD for the operation")
|
||||
|
||||
|
||||
registerModelLabels(
|
||||
"ChatStat",
|
||||
{"en": "Chat Statistics", "fr": "Statistiques de chat"},
|
||||
{
|
||||
"id": {"en": "ID", "fr": "ID"},
|
||||
"workflowId": {"en": "Workflow ID", "fr": "ID du workflow"},
|
||||
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
|
||||
"bytesSent": {"en": "Bytes Sent", "fr": "Octets envoyés"},
|
||||
"bytesReceived": {"en": "Bytes Received", "fr": "Octets reçus"},
|
||||
"errorCount": {"en": "Error Count", "fr": "Nombre d'erreurs"},
|
||||
"process": {"en": "Process", "fr": "Processus"},
|
||||
"engine": {"en": "Engine", "fr": "Moteur"},
|
||||
"priceCHF": {"en": "Price CHF", "fr": "Prix CHF"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ChatLog(BaseModel):
|
||||
"""Log entries for chat workflows. User-owned, no mandate context."""
|
||||
id: str = Field(
|
||||
|
|
@ -322,7 +284,6 @@ class ChatWorkflow(BaseModel):
|
|||
startedAt: float = Field(default_factory=getUtcTimestamp, description="When the workflow started (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False})
|
||||
logs: List[ChatLog] = Field(default_factory=list, description="Workflow logs", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
|
||||
messages: List[ChatMessage] = Field(default_factory=list, description="Messages in the workflow", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
|
||||
stats: List[ChatStat] = Field(default_factory=list, description="Workflow statistics list", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
|
||||
tasks: list = Field(default_factory=list, description="List of tasks in the workflow", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
|
||||
workflowMode: WorkflowModeEnum = Field(default=WorkflowModeEnum.WORKFLOW_DYNAMIC, description="Workflow mode selector", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [
|
||||
{
|
||||
|
|
|
|||
58
modules/datamodels/datamodelContent.py
Normal file
58
modules/datamodels/datamodelContent.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Content Object data models for the container and content extraction pipeline.
|
||||
|
||||
Physical layer: Container hierarchy (ZIP, Folder, File)
|
||||
Logical layer: Scalar content objects (text, image, videostream, audiostream, other)
|
||||
|
||||
The entire extraction pipeline up to ContentObjects runs without AI.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
import uuid
|
||||
|
||||
|
||||
class ContainerLimitError(Exception):
|
||||
"""Raised when container extraction exceeds safety limits (size, depth, file count)."""
|
||||
pass
|
||||
|
||||
|
||||
class ContentContextRef(BaseModel):
|
||||
"""Reference to the origin context within a container/file."""
|
||||
containerPath: str = Field(description="e.g. 'archiv.zip/folder-a/report.pdf'")
|
||||
location: str = Field(default="", description="e.g. 'page:5/region:bottomLeft'")
|
||||
label: Optional[str] = Field(default=None, description="e.g. 'Abbildung 3: Uebersicht'")
|
||||
pageIndex: Optional[int] = Field(default=None, description="Page number (PDF, DOCX)")
|
||||
sectionId: Optional[str] = Field(default=None, description="Section/Heading ID")
|
||||
sheetName: Optional[str] = Field(default=None, description="Sheet name (XLSX)")
|
||||
slideIndex: Optional[int] = Field(default=None, description="Slide number (PPTX)")
|
||||
|
||||
|
||||
class ContentObject(BaseModel):
|
||||
"""Scalar content object extracted from a file. No AI involved."""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
fileId: str = Field(description="FK to the physical file")
|
||||
contentType: str = Field(description="text, image, videostream, audiostream, other")
|
||||
data: str = Field(default="", description="Content data (text, base64, URL)")
|
||||
contextRef: ContentContextRef = Field(default_factory=ContentContextRef)
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
sequence: int = Field(default=0, description="Order within the context")
|
||||
|
||||
|
||||
class ContentObjectSummary(BaseModel):
|
||||
"""Compact description of a content object for the FileContentIndex."""
|
||||
id: str = Field(description="Content object ID")
|
||||
contentType: str = Field(description="text, image, videostream, audiostream, other")
|
||||
contextRef: ContentContextRef = Field(default_factory=ContentContextRef)
|
||||
charCount: Optional[int] = Field(default=None, description="Only for text")
|
||||
dimensions: Optional[str] = Field(default=None, description="Only for image/video (e.g. '1920x1080')")
|
||||
duration: Optional[float] = Field(default=None, description="Only for audio/video (seconds)")
|
||||
|
||||
|
||||
class FileEntry(BaseModel):
|
||||
"""A file extracted from a container (ZIP, TAR, Folder)."""
|
||||
path: str = Field(description="Relative path within the container")
|
||||
data: bytes = Field(description="File content bytes")
|
||||
mimeType: str = Field(description="Detected MIME type")
|
||||
size: int = Field(description="File size in bytes")
|
||||
58
modules/datamodels/datamodelDataSource.py
Normal file
58
modules/datamodels/datamodelDataSource.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""DataSource and ExternalEntry models for external data integration.
|
||||
|
||||
DataSource links a UserConnection to an external path (SharePoint folder,
|
||||
Google Drive folder, FTP directory, etc.) for agent-accessible data containers.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
import uuid
|
||||
|
||||
|
||||
class DataSource(BaseModel):
|
||||
"""Configured external data source linked to a UserConnection."""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
||||
connectionId: str = Field(description="FK to UserConnection")
|
||||
sourceType: str = Field(description="sharepointFolder, googleDriveFolder, outlookFolder, ftpFolder")
|
||||
path: str = Field(description="External path (e.g. '/sites/MySite/Documents/Reports')")
|
||||
label: str = Field(description="User-visible label")
|
||||
featureInstanceId: Optional[str] = Field(default=None, description="Scoped to feature instance")
|
||||
mandateId: Optional[str] = Field(default=None, description="Mandate scope")
|
||||
userId: str = Field(default="", description="Owner user ID")
|
||||
autoSync: bool = Field(default=False, description="Automatically sync on schedule")
|
||||
lastSynced: Optional[float] = Field(default=None, description="Last sync timestamp")
|
||||
createdAt: float = Field(default_factory=getUtcTimestamp, description="Creation timestamp")
|
||||
|
||||
|
||||
registerModelLabels(
|
||||
"DataSource",
|
||||
{"en": "Data Source", "de": "Datenquelle", "fr": "Source de données"},
|
||||
{
|
||||
"id": {"en": "ID", "de": "ID", "fr": "ID"},
|
||||
"connectionId": {"en": "Connection ID", "de": "Verbindungs-ID", "fr": "ID de connexion"},
|
||||
"sourceType": {"en": "Source Type", "de": "Quellentyp", "fr": "Type de source"},
|
||||
"path": {"en": "Path", "de": "Pfad", "fr": "Chemin"},
|
||||
"label": {"en": "Label", "de": "Bezeichnung", "fr": "Libellé"},
|
||||
"featureInstanceId": {"en": "Feature Instance", "de": "Feature-Instanz", "fr": "Instance de fonctionnalité"},
|
||||
"mandateId": {"en": "Mandate ID", "de": "Mandanten-ID", "fr": "ID du mandat"},
|
||||
"userId": {"en": "User ID", "de": "Benutzer-ID", "fr": "ID utilisateur"},
|
||||
"autoSync": {"en": "Auto Sync", "de": "Auto-Sync", "fr": "Synchro auto"},
|
||||
"lastSynced": {"en": "Last Synced", "de": "Letzter Sync", "fr": "Dernier sync"},
|
||||
"createdAt": {"en": "Created At", "de": "Erstellt am", "fr": "Créé le"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ExternalEntry(BaseModel):
|
||||
"""An item (file or folder) from an external data source."""
|
||||
name: str = Field(description="Item name")
|
||||
path: str = Field(description="Full path within the source")
|
||||
isFolder: bool = Field(default=False, description="True if directory/folder")
|
||||
size: Optional[int] = Field(default=None, description="File size in bytes")
|
||||
mimeType: Optional[str] = Field(default=None, description="MIME type (files only)")
|
||||
lastModified: Optional[float] = Field(default=None, description="Last modification timestamp")
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="Provider-specific metadata")
|
||||
|
|
@ -73,7 +73,7 @@ class ExtractionOptions(BaseModel):
|
|||
"""Options for document extraction and processing with clear data structures."""
|
||||
|
||||
# Core extraction parameters
|
||||
prompt: str = Field(description="Extraction prompt for AI processing")
|
||||
prompt: str = Field(default="", description="Extraction prompt for AI processing")
|
||||
processDocumentsIndividually: bool = Field(default=True, description="Process each document separately")
|
||||
|
||||
# Image processing parameters
|
||||
|
|
@ -81,7 +81,7 @@ class ExtractionOptions(BaseModel):
|
|||
imageQuality: int = Field(default=85, ge=1, le=100, description="Image quality (1-100)")
|
||||
|
||||
# Merging strategy
|
||||
mergeStrategy: MergeStrategy = Field(description="Strategy for merging extraction results")
|
||||
mergeStrategy: MergeStrategy = Field(default_factory=MergeStrategy, description="Strategy for merging extraction results")
|
||||
|
||||
# Optional chunking parameters (for backward compatibility)
|
||||
chunkAllowed: Optional[bool] = Field(default=None, description="Whether chunking is allowed")
|
||||
|
|
|
|||
32
modules/datamodels/datamodelFileFolder.py
Normal file
32
modules/datamodels/datamodelFileFolder.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""FileFolder: hierarchical folder structure for file organization."""
|
||||
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
import uuid
|
||||
|
||||
|
||||
class FileFolder(BaseModel):
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
|
||||
name: str = Field(description="Folder name", json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": True})
|
||||
parentId: Optional[str] = Field(default=None, description="Parent folder ID (null = root)", json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": False})
|
||||
mandateId: Optional[str] = Field(default=None, description="Mandate context", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
|
||||
featureInstanceId: Optional[str] = Field(default=None, description="Feature instance context", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
|
||||
createdAt: float = Field(default_factory=getUtcTimestamp, description="Creation timestamp", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False})
|
||||
|
||||
|
||||
registerModelLabels(
|
||||
"FileFolder",
|
||||
{"en": "File Folder", "fr": "Dossier de fichiers"},
|
||||
{
|
||||
"id": {"en": "ID", "fr": "ID"},
|
||||
"name": {"en": "Name", "fr": "Nom"},
|
||||
"parentId": {"en": "Parent Folder", "fr": "Dossier parent"},
|
||||
"mandateId": {"en": "Mandate ID", "fr": "ID du mandat"},
|
||||
"featureInstanceId": {"en": "Feature Instance ID", "fr": "ID de l'instance"},
|
||||
"createdAt": {"en": "Created At", "fr": "Créé le"},
|
||||
},
|
||||
)
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
# All rights reserved.
|
||||
"""File-related datamodels: FileItem, FilePreview, FileData."""
|
||||
|
||||
from typing import Dict, Any, Optional, Union
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
|
|
@ -20,6 +20,10 @@ class FileItem(BaseModel):
|
|||
fileHash: str = Field(description="Hash of the file", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
|
||||
fileSize: int = Field(description="Size of the file in bytes", json_schema_extra={"frontend_type": "integer", "frontend_readonly": True, "frontend_required": False})
|
||||
creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the file was created (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False})
|
||||
tags: Optional[List[str]] = Field(default=None, description="Tags for categorization and search", json_schema_extra={"frontend_type": "tags", "frontend_readonly": False, "frontend_required": False})
|
||||
folderId: Optional[str] = Field(default=None, description="ID of the parent folder", json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": False})
|
||||
description: Optional[str] = Field(default=None, description="User-provided description of the file", json_schema_extra={"frontend_type": "textarea", "frontend_readonly": False, "frontend_required": False})
|
||||
status: Optional[str] = Field(default=None, description="Processing status: pending, extracted, embedding, indexed, failed", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
|
||||
|
||||
registerModelLabels(
|
||||
"FileItem",
|
||||
|
|
@ -33,6 +37,10 @@ registerModelLabels(
|
|||
"fileHash": {"en": "File Hash", "fr": "Hash du fichier"},
|
||||
"fileSize": {"en": "File Size", "fr": "Taille du fichier"},
|
||||
"creationDate": {"en": "Creation Date", "fr": "Date de création"},
|
||||
"tags": {"en": "Tags", "fr": "Tags"},
|
||||
"folderId": {"en": "Folder ID", "fr": "ID du dossier"},
|
||||
"description": {"en": "Description", "fr": "Description"},
|
||||
"status": {"en": "Status", "fr": "Statut"},
|
||||
},
|
||||
)
|
||||
|
||||
|
|
|
|||
130
modules/datamodels/datamodelKnowledge.py
Normal file
130
modules/datamodels/datamodelKnowledge.py
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Knowledge Store data models: FileContentIndex, ContentChunk, WorkflowMemory.
|
||||
|
||||
These models support the 3-tier RAG architecture:
|
||||
- Shared Layer: mandateId-scoped, isShared=True
|
||||
- Instance Layer: userId + featureInstanceId-scoped
|
||||
- Workflow Layer: workflowId-scoped (WorkflowMemory)
|
||||
|
||||
Vector fields use json_schema_extra={"db_type": "vector(1536)"} for pgvector.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
import uuid
|
||||
|
||||
|
||||
class FileContentIndex(BaseModel):
|
||||
"""Structural index of a file's content objects. Created without AI.
|
||||
Lives in the Instance Layer; optionally promoted to Shared Layer via isShared."""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key (typically = fileId)")
|
||||
userId: str = Field(description="Owner user ID")
|
||||
featureInstanceId: str = Field(default="", description="Feature instance scope")
|
||||
mandateId: str = Field(default="", description="Mandate scope")
|
||||
isShared: bool = Field(default=False, description="Visible in Shared Layer for all mandate users")
|
||||
fileName: str = Field(description="Original file name")
|
||||
mimeType: str = Field(description="MIME type of the file")
|
||||
containerPath: Optional[str] = Field(default=None, description="Path within a container (e.g. 'archive.zip/folder/report.pdf')")
|
||||
totalObjects: int = Field(default=0, description="Total number of content objects extracted")
|
||||
totalSize: int = Field(default=0, description="Total size of all content objects in bytes")
|
||||
structure: Dict[str, Any] = Field(default_factory=dict, description="Structural overview (pages, sections, hierarchy)")
|
||||
objectSummary: List[Dict[str, Any]] = Field(default_factory=list, description="Compact summary per content object")
|
||||
extractedAt: float = Field(default_factory=getUtcTimestamp, description="Extraction timestamp")
|
||||
status: str = Field(default="pending", description="Processing status: pending, extracted, embedding, indexed, failed")
|
||||
|
||||
|
||||
registerModelLabels(
|
||||
"FileContentIndex",
|
||||
{"en": "File Content Index", "fr": "Index du contenu de fichier"},
|
||||
{
|
||||
"id": {"en": "ID", "fr": "ID"},
|
||||
"userId": {"en": "User ID", "fr": "ID utilisateur"},
|
||||
"featureInstanceId": {"en": "Feature Instance ID", "fr": "ID de l'instance"},
|
||||
"mandateId": {"en": "Mandate ID", "fr": "ID du mandat"},
|
||||
"isShared": {"en": "Shared", "fr": "Partagé"},
|
||||
"fileName": {"en": "File Name", "fr": "Nom de fichier"},
|
||||
"mimeType": {"en": "MIME Type", "fr": "Type MIME"},
|
||||
"containerPath": {"en": "Container Path", "fr": "Chemin du conteneur"},
|
||||
"totalObjects": {"en": "Total Objects", "fr": "Nombre total d'objets"},
|
||||
"totalSize": {"en": "Total Size", "fr": "Taille totale"},
|
||||
"structure": {"en": "Structure", "fr": "Structure"},
|
||||
"objectSummary": {"en": "Object Summary", "fr": "Résumé des objets"},
|
||||
"extractedAt": {"en": "Extracted At", "fr": "Extrait le"},
|
||||
"status": {"en": "Status", "fr": "Statut"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ContentChunk(BaseModel):
|
||||
"""Persisted content chunk with embedding vector. Reusable across workflows.
|
||||
Scalar content object (or chunk thereof) with pgvector embedding."""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
||||
contentObjectId: str = Field(description="Reference to the content object within FileContentIndex")
|
||||
fileId: str = Field(description="FK to the source file")
|
||||
userId: str = Field(description="Owner user ID")
|
||||
featureInstanceId: str = Field(default="", description="Feature instance scope")
|
||||
contentType: str = Field(description="Content type: text, image, videostream, audiostream, other")
|
||||
data: str = Field(description="Content data (text, base64, URL)")
|
||||
contextRef: Dict[str, Any] = Field(default_factory=dict, description="Context reference (page, position, label)")
|
||||
summary: Optional[str] = Field(default=None, description="AI-generated summary (on demand)")
|
||||
chunkMetadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
|
||||
embedding: Optional[List[float]] = Field(
|
||||
default=None, description="pgvector embedding (NOT NULL for text chunks)",
|
||||
json_schema_extra={"db_type": "vector(1536)"}
|
||||
)
|
||||
|
||||
|
||||
registerModelLabels(
|
||||
"ContentChunk",
|
||||
{"en": "Content Chunk", "fr": "Fragment de contenu"},
|
||||
{
|
||||
"id": {"en": "ID", "fr": "ID"},
|
||||
"contentObjectId": {"en": "Content Object ID", "fr": "ID de l'objet de contenu"},
|
||||
"fileId": {"en": "File ID", "fr": "ID du fichier"},
|
||||
"userId": {"en": "User ID", "fr": "ID utilisateur"},
|
||||
"featureInstanceId": {"en": "Feature Instance ID", "fr": "ID de l'instance"},
|
||||
"contentType": {"en": "Content Type", "fr": "Type de contenu"},
|
||||
"data": {"en": "Data", "fr": "Données"},
|
||||
"contextRef": {"en": "Context Reference", "fr": "Référence contextuelle"},
|
||||
"summary": {"en": "Summary", "fr": "Résumé"},
|
||||
"chunkMetadata": {"en": "Metadata", "fr": "Métadonnées"},
|
||||
"embedding": {"en": "Embedding", "fr": "Vecteur d'embedding"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class WorkflowMemory(BaseModel):
|
||||
"""Workflow-scoped key-value cache for entities and facts.
|
||||
Extracted during agent rounds, persisted for cross-round and cross-workflow reuse."""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
||||
workflowId: str = Field(description="FK to the workflow")
|
||||
userId: str = Field(description="Owner user ID")
|
||||
featureInstanceId: str = Field(default="", description="Feature instance scope")
|
||||
key: str = Field(description="Key identifier (e.g. 'entity:companyName')")
|
||||
value: str = Field(description="Extracted value")
|
||||
source: str = Field(default="extraction", description="Origin: extraction, tool, conversation, summary")
|
||||
createdAt: float = Field(default_factory=getUtcTimestamp, description="Creation timestamp")
|
||||
embedding: Optional[List[float]] = Field(
|
||||
default=None, description="Optional embedding for semantic lookup",
|
||||
json_schema_extra={"db_type": "vector(1536)"}
|
||||
)
|
||||
|
||||
|
||||
registerModelLabels(
|
||||
"WorkflowMemory",
|
||||
{"en": "Workflow Memory", "fr": "Mémoire de workflow"},
|
||||
{
|
||||
"id": {"en": "ID", "fr": "ID"},
|
||||
"workflowId": {"en": "Workflow ID", "fr": "ID du workflow"},
|
||||
"userId": {"en": "User ID", "fr": "ID utilisateur"},
|
||||
"featureInstanceId": {"en": "Feature Instance ID", "fr": "ID de l'instance"},
|
||||
"key": {"en": "Key", "fr": "Clé"},
|
||||
"value": {"en": "Value", "fr": "Valeur"},
|
||||
"source": {"en": "Source", "fr": "Source"},
|
||||
"createdAt": {"en": "Created At", "fr": "Créé le"},
|
||||
"embedding": {"en": "Embedding", "fr": "Vecteur d'embedding"},
|
||||
},
|
||||
)
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
# All rights reserved.
|
||||
"""Voice settings datamodel."""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
|
|
@ -16,6 +17,7 @@ class VoiceSettings(BaseModel):
|
|||
sttLanguage: str = Field(default="de-DE", description="Speech-to-Text language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
|
||||
ttsLanguage: str = Field(default="de-DE", description="Text-to-Speech language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
|
||||
ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
|
||||
ttsVoiceMap: Dict[str, Any] = Field(default_factory=dict, description="Per-language voice mapping, e.g. {'de-DE': {'voiceName': 'de-DE-Wavenet-A'}, 'en-US': {'voiceName': 'en-US-Wavenet-C'}}", json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False})
|
||||
translationEnabled: bool = Field(default=True, description="Whether translation is enabled", json_schema_extra={"frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False})
|
||||
targetLanguage: str = Field(default="en-US", description="Target language for translation", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False})
|
||||
creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False})
|
||||
|
|
@ -33,6 +35,7 @@ registerModelLabels(
|
|||
"sttLanguage": {"en": "STT Language", "fr": "Langue STT"},
|
||||
"ttsLanguage": {"en": "TTS Language", "fr": "Langue TTS"},
|
||||
"ttsVoice": {"en": "TTS Voice", "fr": "Voix TTS"},
|
||||
"ttsVoiceMap": {"en": "TTS Voice Map", "fr": "Carte des voix TTS"},
|
||||
"translationEnabled": {"en": "Translation Enabled", "fr": "Traduction activée"},
|
||||
"targetLanguage": {"en": "Target Language", "fr": "Langue cible"},
|
||||
"creationDate": {"en": "Creation Date", "fr": "Date de création"},
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ def getAutomationServices(
|
|||
for spec in REQUIRED_SERVICES:
|
||||
key = spec["serviceKey"]
|
||||
try:
|
||||
svc = getService(key, ctx, legacy_hub=None)
|
||||
svc = getService(key, ctx)
|
||||
setattr(hub, key, svc)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not resolve service '{key}' for automation: {e}")
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ from modules.datamodels.datamodelChat import ChatWorkflow, ChatMessage, ChatLog
|
|||
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata, normalize_pagination_dict
|
||||
from modules.shared.attributeUtils import getModelAttributeDefinitions
|
||||
from modules.interfaces import interfaceDbChat
|
||||
from modules.interfaces.interfaceDbBilling import getInterface as _getBillingInterface
|
||||
# Configure logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -682,7 +683,9 @@ def get_automation_workflow_chat_data(
|
|||
workflow = chatInterface.getWorkflow(workflowId)
|
||||
if not workflow:
|
||||
raise HTTPException(status_code=404, detail=f"Workflow {workflowId} not found")
|
||||
return chatInterface.getUnifiedChatData(workflowId, afterTimestamp)
|
||||
billingInterface = _getBillingInterface(context.user, context.mandateId)
|
||||
workflowCost = billingInterface.getWorkflowCost(workflowId)
|
||||
return chatInterface.getUnifiedChatData(workflowId, afterTimestamp, workflowCost=workflowCost)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -1291,17 +1291,6 @@ class ChatObjects:
|
|||
logger.error(f"Error updating message {messageId}: {str(e)}", exc_info=True)
|
||||
raise ValueError(f"Error updating message {messageId}: {str(e)}")
|
||||
|
||||
def createStat(self, statData: Dict[str, Any]):
|
||||
"""Create stat record. Compatibility with ChatService; stats may not be persisted in chatbot schema."""
|
||||
from modules.datamodels.datamodelChat import ChatStat
|
||||
stat = ChatStat(**statData)
|
||||
try:
|
||||
created = self.db.recordCreate(ChatStat, statData)
|
||||
return ChatStat(**created)
|
||||
except Exception as e:
|
||||
logger.debug(f"createStat: not persisting (chatbot schema): {e}")
|
||||
return stat
|
||||
|
||||
def deleteMessage(self, conversationId: str, messageId: str) -> bool:
|
||||
"""Deletes a conversation message and related data if user has access."""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@ def getChatbotServices(
|
|||
for spec in REQUIRED_SERVICES:
|
||||
key = spec["serviceKey"]
|
||||
try:
|
||||
svc = getService(key, ctx, legacy_hub=None)
|
||||
svc = getService(key, ctx)
|
||||
setattr(hub, key, svc)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not resolve service '{key}' for chatbot: {e}")
|
||||
|
|
@ -197,7 +197,7 @@ def getChatStreamingHelper():
|
|||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
# Minimal context - streaming service only needs it for resolver
|
||||
ctx = ServiceCenterContext(user=__get_placeholder_user(), mandate_id=None, feature_instance_id=None)
|
||||
streaming = getService("streaming", ctx, legacy_hub=None)
|
||||
streaming = getService("streaming", ctx)
|
||||
return streaming.getChatStreamingHelper() if streaming else None
|
||||
|
||||
|
||||
|
|
@ -219,7 +219,7 @@ def getEventManager(user, mandateId: Optional[str] = None, featureInstanceId: Op
|
|||
mandate_id=mandateId,
|
||||
feature_instance_id=featureInstanceId,
|
||||
)
|
||||
streaming = getService("streaming", ctx, legacy_hub=None)
|
||||
streaming = getService("streaming", ctx)
|
||||
return streaming.getEventManager()
|
||||
|
||||
|
||||
|
|
@ -306,12 +306,12 @@ def getChatbotServices(
|
|||
Uses interfaceFeatureChatbot (ChatObjects) for interfaceDbChat to avoid
|
||||
duplicate DB init - chatProcess reuses hub.interfaceDbChat.
|
||||
"""
|
||||
from modules.services import PublicService
|
||||
from modules.serviceHub import PublicService
|
||||
from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
|
||||
from modules.features.chatbot.interfaceFeatureChatbot import getInterface as getChatbotInterface
|
||||
from modules.services.serviceChat.mainServiceChat import ChatService
|
||||
from modules.services.serviceAi.mainServiceAi import AiService
|
||||
from modules.services.serviceStreaming.mainServiceStreaming import StreamingService
|
||||
from modules.serviceCenter.services.serviceChat.mainServiceChat import ChatService
|
||||
from modules.serviceCenter.services.serviceAi.mainServiceAi import AiService
|
||||
from modules.serviceCenter.core.serviceStreaming.mainServiceStreaming import StreamingService
|
||||
|
||||
hub = _ChatbotServiceHub()
|
||||
hub.user = user
|
||||
|
|
@ -344,7 +344,7 @@ def getChatbotServices(
|
|||
feature_instance_id=featureInstanceId,
|
||||
workflow=_workflow,
|
||||
)
|
||||
hub.billing = getService("billing", ctx, legacy_hub=None)
|
||||
hub.billing = getService("billing", ctx)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not resolve billing service for chatbot: {e}")
|
||||
hub.billing = None
|
||||
|
|
|
|||
|
|
@ -135,11 +135,3 @@ class ChatPlaygroundObjects:
|
|||
def createLog(self, log) -> Dict[str, Any]:
|
||||
"""Create a new log entry."""
|
||||
return self._chatInterface.createLog(log)
|
||||
|
||||
def getStats(self, workflowId: str) -> List[Dict[str, Any]]:
|
||||
"""Get stats for a workflow."""
|
||||
return self._chatInterface.getStats(workflowId)
|
||||
|
||||
def createStat(self, stat) -> Dict[str, Any]:
|
||||
"""Create a new stat entry."""
|
||||
return self._chatInterface.createStat(stat)
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ def getChatplaygroundServices(
|
|||
for spec in REQUIRED_SERVICES:
|
||||
key = spec["serviceKey"]
|
||||
try:
|
||||
svc = getService(key, ctx, legacy_hub=None)
|
||||
svc = getService(key, ctx)
|
||||
setattr(hub, key, svc)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not resolve service '{key}' for chatplayground: {e}")
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ from modules.auth import limiter, getRequestContext, RequestContext
|
|||
|
||||
# Import interfaces
|
||||
from modules.interfaces import interfaceDbChat
|
||||
from modules.interfaces.interfaceDbBilling import getInterface as _getBillingInterface
|
||||
|
||||
# Import models
|
||||
from modules.datamodels.datamodelChat import (
|
||||
|
|
@ -220,9 +221,11 @@ def get_workflow_chat_data(
|
|||
detail=f"Workflow with ID {workflowId} not found"
|
||||
)
|
||||
|
||||
# Get unified chat data
|
||||
chatData = chatInterface.getUnifiedChatData(workflowId, afterTimestamp)
|
||||
# Get workflow cost from billing transactions (single source of truth)
|
||||
billingInterface = _getBillingInterface(context.user, context.mandateId)
|
||||
workflowCost = billingInterface.getWorkflowCost(workflowId)
|
||||
|
||||
chatData = chatInterface.getUnifiedChatData(workflowId, afterTimestamp, workflowCost=workflowCost)
|
||||
return chatData
|
||||
|
||||
except HTTPException:
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from modules.auth import limiter, getRequestContext, RequestContext
|
|||
from modules.interfaces import interfaceDbChat, interfaceDbManagement
|
||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||
from modules.datamodels.datamodelChat import UserInputRequest
|
||||
from modules.services.serviceStreaming import get_event_manager
|
||||
from modules.serviceCenter.core.serviceStreaming import get_event_manager
|
||||
from modules.features.codeeditor import codeEditorProcessor, fileContextManager
|
||||
from modules.features.codeeditor.datamodelCodeeditor import FileEditProposal, EditStatusEnum
|
||||
|
||||
|
|
|
|||
|
|
@ -1011,7 +1011,7 @@ class CommcoachService:
|
|||
|
||||
async def _callAi(self, systemPrompt: str, userPrompt: str):
|
||||
"""Call the AI service with the given prompts."""
|
||||
from modules.services.serviceAi.mainServiceAi import AiService
|
||||
from modules.serviceCenter.services.serviceAi.mainServiceAi import AiService
|
||||
|
||||
serviceContext = type('Ctx', (), {
|
||||
'user': self.currentUser,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from urllib.parse import urlparse, unquote
|
|||
|
||||
from modules.datamodels.datamodelUam import User
|
||||
from .datamodelFeatureNeutralizer import DataNeutralizerAttributes, DataNeutraliserConfig
|
||||
from modules.services import getInterface as getServices
|
||||
from modules.serviceHub import getInterface as getServices
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -205,7 +205,7 @@ class NeutralizationPlayground:
|
|||
|
||||
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
|
||||
"""Process files from SharePoint source path and store neutralized files in target path"""
|
||||
from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService
|
||||
from modules.serviceCenter.services.serviceSharepoint.mainServiceSharepoint import SharepointService
|
||||
processor = SharepointProcessor(self.currentUser, self.services)
|
||||
return await processor.processSharepointFiles(sourcePath, targetPath)
|
||||
|
||||
|
|
|
|||
|
|
@ -262,8 +262,8 @@ class NeutralizationService:
|
|||
fileId: Optional[str]
|
||||
) -> Dict[str, Any]:
|
||||
"""Extract -> neutralize -> adapt -> generate for PDF/DOCX/XLSX/PPTX."""
|
||||
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||
from modules.services.serviceExtraction.subPipeline import runExtraction
|
||||
from modules.serviceCenter.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
|
||||
|
||||
# Ensure registries exist
|
||||
|
|
@ -405,10 +405,10 @@ class NeutralizationService:
|
|||
|
||||
def _getRendererForMime(self, mimeType: str):
|
||||
"""Get renderer instance and output mime for the given input MIME type."""
|
||||
from modules.services.serviceGeneration.renderers.rendererPdf import RendererPdf
|
||||
from modules.services.serviceGeneration.renderers.rendererDocx import RendererDocx
|
||||
from modules.services.serviceGeneration.renderers.rendererXlsx import RendererXlsx
|
||||
from modules.services.serviceGeneration.renderers.rendererPptx import RendererPptx
|
||||
from modules.serviceCenter.services.serviceGeneration.renderers.rendererPdf import RendererPdf
|
||||
from modules.serviceCenter.services.serviceGeneration.renderers.rendererDocx import RendererDocx
|
||||
from modules.serviceCenter.services.serviceGeneration.renderers.rendererXlsx import RendererXlsx
|
||||
from modules.serviceCenter.services.serviceGeneration.renderers.rendererPptx import RendererPptx
|
||||
|
||||
mime_map = {
|
||||
"application/pdf": (RendererPdf, "application/pdf"),
|
||||
|
|
|
|||
|
|
@ -284,7 +284,7 @@ from .datamodelFeatureRealEstate import (
|
|||
Land,
|
||||
DokumentTyp,
|
||||
)
|
||||
from modules.services import getInterface as getServices
|
||||
from modules.serviceHub import getInterface as getServices
|
||||
from .interfaceFeatureRealEstate import getInterface as getRealEstateInterface
|
||||
from modules.interfaces.interfaceDbManagement import getInterface as getComponentInterface
|
||||
from modules.connectors.connectorSwissTopoMapServer import SwissTopoMapServerConnector
|
||||
|
|
|
|||
|
|
@ -843,7 +843,7 @@ async def testVoice(
|
|||
):
|
||||
"""Test TTS voice with AI-generated sample text in the correct language."""
|
||||
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
||||
from modules.services.serviceAi.mainServiceAi import AiService
|
||||
from modules.serviceCenter.services.serviceAi.mainServiceAi import AiService
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
||||
|
||||
mandateId = _validateInstanceAccess(instanceId, context)
|
||||
|
|
|
|||
|
|
@ -1062,7 +1062,7 @@ class TeamsbotService:
|
|||
|
||||
# Call SPEECH_TEAMS
|
||||
try:
|
||||
from modules.services.serviceAi.mainServiceAi import AiService
|
||||
from modules.serviceCenter.services.serviceAi.mainServiceAi import AiService
|
||||
|
||||
# Create minimal service context for AI billing
|
||||
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
||||
|
|
@ -1684,7 +1684,7 @@ class TeamsbotService:
|
|||
"""Summarize a long user-provided session context to its essential points.
|
||||
This reduces token usage in every subsequent AI call."""
|
||||
try:
|
||||
from modules.services.serviceAi.mainServiceAi import AiService
|
||||
from modules.serviceCenter.services.serviceAi.mainServiceAi import AiService
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
||||
|
||||
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
||||
|
|
@ -1738,7 +1738,7 @@ class TeamsbotService:
|
|||
lines.append(f"[{speaker}]: {text}")
|
||||
textToSummarize = "\n".join(lines)
|
||||
|
||||
from modules.services.serviceAi.mainServiceAi import AiService
|
||||
from modules.serviceCenter.services.serviceAi.mainServiceAi import AiService
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
||||
|
||||
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
||||
|
|
@ -1783,7 +1783,7 @@ class TeamsbotService:
|
|||
for t in transcripts
|
||||
)
|
||||
|
||||
from modules.services.serviceAi.mainServiceAi import AiService
|
||||
from modules.serviceCenter.services.serviceAi.mainServiceAi import AiService
|
||||
|
||||
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
||||
aiService = AiService(serviceCenter=serviceContext)
|
||||
|
|
|
|||
|
|
@ -188,7 +188,7 @@ def get_mime_type_options(
|
|||
"""Get supported MIME types from the document extraction service.
|
||||
Returns: [{ value: "mime/type", label: "Description" }]
|
||||
"""
|
||||
from modules.services.serviceExtraction.subRegistry import ExtractorRegistry
|
||||
from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry
|
||||
|
||||
registry = ExtractorRegistry()
|
||||
formats = registry.getSupportedFormats()
|
||||
|
|
|
|||
3
modules/features/workspace/__init__.py
Normal file
3
modules/features/workspace/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Unified AI Workspace feature -- merges Codeeditor, Chatbot, and Playground."""
|
||||
255
modules/features/workspace/mainWorkspace.py
Normal file
255
modules/features/workspace/mainWorkspace.py
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Workspace Feature Container - Main Module.
|
||||
Handles feature initialization and RBAC catalog registration.
|
||||
Unified AI Workspace combining Codeeditor, Chatbot, and Playground capabilities.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
FEATURE_CODE = "workspace"
|
||||
FEATURE_LABEL = {"en": "AI Workspace", "de": "AI Workspace", "fr": "AI Workspace"}
|
||||
FEATURE_ICON = "mdi-brain"
|
||||
|
||||
UI_OBJECTS = [
|
||||
{
|
||||
"objectKey": "ui.feature.workspace.dashboard",
|
||||
"label": {"en": "Dashboard", "de": "Dashboard", "fr": "Tableau de bord"},
|
||||
"meta": {"area": "dashboard"}
|
||||
},
|
||||
{
|
||||
"objectKey": "ui.feature.workspace.settings",
|
||||
"label": {"en": "Settings", "de": "Einstellungen", "fr": "Parametres"},
|
||||
"meta": {"area": "settings"}
|
||||
},
|
||||
]
|
||||
|
||||
RESOURCE_OBJECTS = [
|
||||
{
|
||||
"objectKey": "resource.feature.workspace.start",
|
||||
"label": {"en": "Start Agent", "de": "Agent starten", "fr": "Demarrer agent"},
|
||||
"meta": {"endpoint": "/api/workspace/{instanceId}/start/stream", "method": "POST"}
|
||||
},
|
||||
{
|
||||
"objectKey": "resource.feature.workspace.stop",
|
||||
"label": {"en": "Stop Agent", "de": "Agent stoppen", "fr": "Arreter agent"},
|
||||
"meta": {"endpoint": "/api/workspace/{instanceId}/{workflowId}/stop", "method": "POST"}
|
||||
},
|
||||
{
|
||||
"objectKey": "resource.feature.workspace.files",
|
||||
"label": {"en": "Manage Files", "de": "Dateien verwalten", "fr": "Gerer fichiers"},
|
||||
"meta": {"endpoint": "/api/workspace/{instanceId}/files", "method": "GET"}
|
||||
},
|
||||
{
|
||||
"objectKey": "resource.feature.workspace.folders",
|
||||
"label": {"en": "Manage Folders", "de": "Ordner verwalten", "fr": "Gerer dossiers"},
|
||||
"meta": {"endpoint": "/api/workspace/{instanceId}/folders", "method": "GET"}
|
||||
},
|
||||
{
|
||||
"objectKey": "resource.feature.workspace.datasources",
|
||||
"label": {"en": "Data Sources", "de": "Datenquellen", "fr": "Sources de donnees"},
|
||||
"meta": {"endpoint": "/api/workspace/{instanceId}/datasources", "method": "GET"}
|
||||
},
|
||||
{
|
||||
"objectKey": "resource.feature.workspace.voice",
|
||||
"label": {"en": "Voice Input/Output", "de": "Spracheingabe/-ausgabe", "fr": "Entree/sortie vocale"},
|
||||
"meta": {"endpoint": "/api/workspace/{instanceId}/voice/*", "method": "POST"}
|
||||
},
|
||||
]
|
||||
|
||||
TEMPLATE_ROLES = [
|
||||
{
|
||||
"roleLabel": "workspace-viewer",
|
||||
"description": {
|
||||
"en": "Workspace Viewer - View workspace (read-only)",
|
||||
"de": "Workspace Betrachter - Workspace ansehen (nur lesen)",
|
||||
"fr": "Visualiseur Workspace - Consulter le workspace (lecture seule)"
|
||||
},
|
||||
"accessRules": [
|
||||
{"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
|
||||
{"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
|
||||
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "n", "update": "n", "delete": "n"},
|
||||
]
|
||||
},
|
||||
{
|
||||
"roleLabel": "workspace-user",
|
||||
"description": {
|
||||
"en": "Workspace User - Use AI workspace and tools",
|
||||
"de": "Workspace Benutzer - AI Workspace und Tools nutzen",
|
||||
"fr": "Utilisateur Workspace - Utiliser l'espace de travail AI et les outils"
|
||||
},
|
||||
"accessRules": [
|
||||
{"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
|
||||
{"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
|
||||
{"context": "RESOURCE", "item": "resource.feature.workspace.start", "view": True},
|
||||
{"context": "RESOURCE", "item": "resource.feature.workspace.stop", "view": True},
|
||||
{"context": "RESOURCE", "item": "resource.feature.workspace.files", "view": True},
|
||||
{"context": "RESOURCE", "item": "resource.feature.workspace.folders", "view": True},
|
||||
{"context": "RESOURCE", "item": "resource.feature.workspace.datasources", "view": True},
|
||||
{"context": "RESOURCE", "item": "resource.feature.workspace.voice", "view": True},
|
||||
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "m", "update": "m", "delete": "m"},
|
||||
]
|
||||
},
|
||||
{
|
||||
"roleLabel": "workspace-admin",
|
||||
"description": {
|
||||
"en": "Workspace Admin - Full access to AI workspace",
|
||||
"de": "Workspace Admin - Vollzugriff auf AI Workspace",
|
||||
"fr": "Administrateur Workspace - Acces complet au workspace AI"
|
||||
},
|
||||
"accessRules": [
|
||||
{"context": "UI", "item": None, "view": True},
|
||||
{"context": "RESOURCE", "item": None, "view": True},
|
||||
{"context": "DATA", "item": None, "view": True, "read": "a", "create": "a", "update": "a", "delete": "a"},
|
||||
]
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def getFeatureDefinition() -> Dict[str, Any]:
|
||||
"""Return the feature definition for registration."""
|
||||
return {
|
||||
"code": FEATURE_CODE,
|
||||
"label": FEATURE_LABEL,
|
||||
"icon": FEATURE_ICON,
|
||||
"autoCreateInstance": True,
|
||||
}
|
||||
|
||||
|
||||
def getUiObjects() -> List[Dict[str, Any]]:
|
||||
"""Return UI objects for RBAC catalog registration."""
|
||||
return UI_OBJECTS
|
||||
|
||||
|
||||
def getResourceObjects() -> List[Dict[str, Any]]:
|
||||
"""Return resource objects for RBAC catalog registration."""
|
||||
return RESOURCE_OBJECTS
|
||||
|
||||
|
||||
def getTemplateRoles() -> List[Dict[str, Any]]:
|
||||
"""Return template roles for this feature."""
|
||||
return TEMPLATE_ROLES
|
||||
|
||||
|
||||
def registerFeature(catalogService) -> bool:
|
||||
"""Register this feature's RBAC objects in the catalog."""
|
||||
try:
|
||||
for uiObj in UI_OBJECTS:
|
||||
catalogService.registerUiObject(
|
||||
featureCode=FEATURE_CODE,
|
||||
objectKey=uiObj["objectKey"],
|
||||
label=uiObj["label"],
|
||||
meta=uiObj.get("meta")
|
||||
)
|
||||
|
||||
for resObj in RESOURCE_OBJECTS:
|
||||
catalogService.registerResourceObject(
|
||||
featureCode=FEATURE_CODE,
|
||||
objectKey=resObj["objectKey"],
|
||||
label=resObj["label"],
|
||||
meta=resObj.get("meta")
|
||||
)
|
||||
|
||||
_syncTemplateRolesToDb()
|
||||
|
||||
logger.info(f"Feature '{FEATURE_CODE}' registered {len(UI_OBJECTS)} UI objects and {len(RESOURCE_OBJECTS)} resource objects")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to register feature '{FEATURE_CODE}': {e}")
|
||||
return False
|
||||
|
||||
|
||||
def _syncTemplateRolesToDb() -> int:
|
||||
"""Sync template roles and their AccessRules to the database."""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.datamodels.datamodelRbac import Role, AccessRule, AccessRuleContext
|
||||
|
||||
rootInterface = getRootInterface()
|
||||
|
||||
existingRoles = rootInterface.getRolesByFeatureCode(FEATURE_CODE)
|
||||
templateRoles = [r for r in existingRoles if r.mandateId is None]
|
||||
existingRoleLabels = {r.roleLabel: str(r.id) for r in templateRoles}
|
||||
|
||||
createdCount = 0
|
||||
for roleTemplate in TEMPLATE_ROLES:
|
||||
roleLabel = roleTemplate["roleLabel"]
|
||||
|
||||
if roleLabel in existingRoleLabels:
|
||||
roleId = existingRoleLabels[roleLabel]
|
||||
_ensureAccessRulesForRole(rootInterface, roleId, roleTemplate.get("accessRules", []))
|
||||
else:
|
||||
newRole = Role(
|
||||
roleLabel=roleLabel,
|
||||
description=roleTemplate.get("description", {}),
|
||||
featureCode=FEATURE_CODE,
|
||||
mandateId=None,
|
||||
featureInstanceId=None,
|
||||
isSystemRole=False
|
||||
)
|
||||
createdRole = rootInterface.db.recordCreate(Role, newRole.model_dump())
|
||||
roleId = createdRole.get("id")
|
||||
_ensureAccessRulesForRole(rootInterface, roleId, roleTemplate.get("accessRules", []))
|
||||
logger.info(f"Created template role '{roleLabel}' with ID {roleId}")
|
||||
createdCount += 1
|
||||
|
||||
if createdCount > 0:
|
||||
logger.info(f"Feature '{FEATURE_CODE}': Created {createdCount} template roles")
|
||||
|
||||
return createdCount
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing template roles for feature '{FEATURE_CODE}': {e}")
|
||||
return 0
|
||||
|
||||
|
||||
def _ensureAccessRulesForRole(rootInterface, roleId: str, ruleTemplates: List[Dict[str, Any]]) -> int:
|
||||
"""Ensure AccessRules exist for a role based on templates."""
|
||||
from modules.datamodels.datamodelRbac import AccessRule, AccessRuleContext
|
||||
|
||||
existingRules = rootInterface.getAccessRulesByRole(roleId)
|
||||
existingSignatures = set()
|
||||
for rule in existingRules:
|
||||
sig = (rule.context.value if rule.context else None, rule.item)
|
||||
existingSignatures.add(sig)
|
||||
|
||||
createdCount = 0
|
||||
for template in ruleTemplates:
|
||||
context = template.get("context", "UI")
|
||||
item = template.get("item")
|
||||
sig = (context, item)
|
||||
|
||||
if sig in existingSignatures:
|
||||
continue
|
||||
|
||||
if context == "UI":
|
||||
contextEnum = AccessRuleContext.UI
|
||||
elif context == "DATA":
|
||||
contextEnum = AccessRuleContext.DATA
|
||||
elif context == "RESOURCE":
|
||||
contextEnum = AccessRuleContext.RESOURCE
|
||||
else:
|
||||
contextEnum = context
|
||||
|
||||
newRule = AccessRule(
|
||||
roleId=roleId,
|
||||
context=contextEnum,
|
||||
item=item,
|
||||
view=template.get("view", False),
|
||||
read=template.get("read"),
|
||||
create=template.get("create"),
|
||||
update=template.get("update"),
|
||||
delete=template.get("delete"),
|
||||
)
|
||||
rootInterface.db.recordCreate(AccessRule, newRule.model_dump())
|
||||
createdCount += 1
|
||||
|
||||
if createdCount > 0:
|
||||
logger.debug(f"Created {createdCount} AccessRules for role {roleId}")
|
||||
|
||||
return createdCount
|
||||
1037
modules/features/workspace/routeFeatureWorkspace.py
Normal file
1037
modules/features/workspace/routeFeatureWorkspace.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -4,7 +4,7 @@ import logging
|
|||
import asyncio
|
||||
import uuid
|
||||
import base64
|
||||
from typing import Dict, Any, List, Union, Tuple, Optional, Callable
|
||||
from typing import Dict, Any, List, Union, Tuple, Optional, Callable, AsyncGenerator
|
||||
from dataclasses import dataclass, field
|
||||
import time
|
||||
|
||||
|
|
@ -84,15 +84,16 @@ class AiObjects:
|
|||
|
||||
# AI for Extraction, Processing, Generation
|
||||
async def callWithTextContext(self, request: AiCallRequest) -> AiCallResponse:
|
||||
"""Call AI model for traditional text/context calls with fallback mechanism."""
|
||||
"""Call AI model for traditional text/context calls with fallback mechanism.
|
||||
|
||||
Supports two modes:
|
||||
- Legacy: prompt + context → constructs messages internally
|
||||
- Agent: request.messages provided → passes through directly
|
||||
"""
|
||||
prompt = request.prompt
|
||||
context = request.context or ""
|
||||
options = request.options
|
||||
|
||||
# Input bytes will be calculated inside _callWithModel
|
||||
|
||||
# Generation parameters are handled inside _callWithModel
|
||||
|
||||
# Get failover models for this operation type
|
||||
availableModels = modelRegistry.getAvailableModels()
|
||||
|
||||
|
|
@ -127,10 +128,12 @@ class AiObjects:
|
|||
try:
|
||||
logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
||||
|
||||
# Call the model directly - no truncation or compression here
|
||||
response = await self._callWithModel(model, prompt, context, options)
|
||||
if request.messages:
|
||||
response = await self._callWithMessages(model, request.messages, options, request.tools)
|
||||
else:
|
||||
response = await self._callWithModel(model, prompt, context, options)
|
||||
|
||||
logger.info(f"✅ AI call successful with model: {model.name}")
|
||||
logger.info(f"AI call successful with model: {model.name}")
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -142,8 +145,7 @@ class AiObjects:
|
|||
logger.info(f"Trying next failover model...")
|
||||
continue
|
||||
else:
|
||||
# All models failed
|
||||
logger.error(f"💥 All {len(failoverModelList)} models failed for operation {options.operationType}")
|
||||
logger.error(f"All {len(failoverModelList)} models failed for operation {options.operationType}")
|
||||
break
|
||||
|
||||
# All failover attempts failed - return error response
|
||||
|
|
@ -254,6 +256,242 @@ class AiObjects:
|
|||
|
||||
return response
|
||||
|
||||
async def _callWithMessages(self, model: AiModel, messages: List[Dict[str, Any]],
|
||||
options: AiCallOptions = None,
|
||||
tools: List[Dict[str, Any]] = None) -> AiCallResponse:
|
||||
"""Call a model with pre-built messages (agent mode). Supports tools for native function calling."""
|
||||
import json as _json
|
||||
|
||||
inputBytes = sum(len(str(m.get("content", "")).encode("utf-8")) for m in messages)
|
||||
startTime = time.time()
|
||||
|
||||
if not model.functionCall:
|
||||
raise ValueError(f"Model {model.name} has no function call defined")
|
||||
|
||||
modelCall = AiModelCall(
|
||||
messages=messages,
|
||||
model=model,
|
||||
options=options or {},
|
||||
tools=tools
|
||||
)
|
||||
|
||||
modelResponse = await model.functionCall(modelCall)
|
||||
|
||||
if not modelResponse.success:
|
||||
raise ValueError(f"Model call failed: {modelResponse.error}")
|
||||
|
||||
endTime = time.time()
|
||||
processingTime = endTime - startTime
|
||||
content = modelResponse.content
|
||||
outputBytes = len(content.encode("utf-8"))
|
||||
priceCHF = model.calculatepriceCHF(processingTime, inputBytes, outputBytes)
|
||||
|
||||
# Extract tool calls from metadata if present (native function calling)
|
||||
responseToolCalls = None
|
||||
if modelResponse.metadata:
|
||||
responseToolCalls = modelResponse.metadata.get("toolCalls")
|
||||
|
||||
response = AiCallResponse(
|
||||
content=content,
|
||||
modelName=model.name,
|
||||
provider=model.connectorType,
|
||||
priceCHF=priceCHF,
|
||||
processingTime=processingTime,
|
||||
bytesSent=inputBytes,
|
||||
bytesReceived=outputBytes,
|
||||
errorCount=0,
|
||||
toolCalls=responseToolCalls
|
||||
)
|
||||
|
||||
if self.billingCallback:
|
||||
try:
|
||||
self.billingCallback(response)
|
||||
except Exception as e:
|
||||
logger.error(f"BILLING: Failed to record billing for model {model.name}: {e}")
|
||||
|
||||
return response
|
||||
|
||||
async def callWithTextContextStream(
|
||||
self, request: AiCallRequest
|
||||
) -> AsyncGenerator[Union[str, AiCallResponse], None]:
|
||||
"""Streaming variant of callWithTextContext. Yields str deltas, then final AiCallResponse."""
|
||||
options = request.options
|
||||
availableModels = modelRegistry.getAvailableModels()
|
||||
|
||||
allowedProviders = getattr(options, 'allowedProviders', None) if options else None
|
||||
if allowedProviders:
|
||||
filtered = [m for m in availableModels if m.connectorType in allowedProviders]
|
||||
if filtered:
|
||||
availableModels = filtered
|
||||
|
||||
failoverModelList = modelSelector.getFailoverModelList(
|
||||
request.prompt, request.context or "", options, availableModels
|
||||
)
|
||||
if not failoverModelList:
|
||||
yield AiCallResponse(
|
||||
content=f"No suitable models found for operation {options.operationType}",
|
||||
modelName="error", priceCHF=0.0, processingTime=0.0,
|
||||
bytesSent=0, bytesReceived=0, errorCount=1,
|
||||
)
|
||||
return
|
||||
|
||||
lastError = None
|
||||
for attempt, model in enumerate(failoverModelList):
|
||||
try:
|
||||
logger.info(f"Streaming AI call with model: {model.name} (attempt {attempt + 1})")
|
||||
async for chunk in self._callWithMessagesStream(model, request.messages, options, request.tools):
|
||||
yield chunk
|
||||
return
|
||||
except Exception as e:
|
||||
lastError = e
|
||||
logger.warning(f"Streaming AI call failed with {model.name}: {e}")
|
||||
modelSelector.reportFailure(model.name)
|
||||
if attempt < len(failoverModelList) - 1:
|
||||
continue
|
||||
break
|
||||
|
||||
yield AiCallResponse(
|
||||
content=f"All models failed (stream). Last error: {lastError}",
|
||||
modelName="error", priceCHF=0.0, processingTime=0.0,
|
||||
bytesSent=0, bytesReceived=0, errorCount=1,
|
||||
)
|
||||
|
||||
async def _callWithMessagesStream(
|
||||
self, model: AiModel, messages: List[Dict[str, Any]],
|
||||
options: AiCallOptions = None, tools: List[Dict[str, Any]] = None,
|
||||
) -> AsyncGenerator[Union[str, AiCallResponse], None]:
|
||||
"""Stream a model call. Yields str deltas, then final AiCallResponse with billing."""
|
||||
from modules.datamodels.datamodelAi import AiModelCall, AiModelResponse
|
||||
|
||||
inputBytes = sum(len(str(m.get("content", "")).encode("utf-8")) for m in messages)
|
||||
startTime = time.time()
|
||||
|
||||
if not model.functionCallStream:
|
||||
response = await self._callWithMessages(model, messages, options, tools)
|
||||
if response.content:
|
||||
yield response.content
|
||||
yield response
|
||||
return
|
||||
|
||||
modelCall = AiModelCall(
|
||||
messages=messages, model=model,
|
||||
options=options or {}, tools=tools,
|
||||
)
|
||||
|
||||
finalModelResponse = None
|
||||
async for item in model.functionCallStream(modelCall):
|
||||
if isinstance(item, AiModelResponse):
|
||||
finalModelResponse = item
|
||||
else:
|
||||
yield item
|
||||
|
||||
if not finalModelResponse:
|
||||
raise ValueError(f"Stream from {model.name} produced no final AiModelResponse")
|
||||
|
||||
endTime = time.time()
|
||||
processingTime = endTime - startTime
|
||||
content = finalModelResponse.content
|
||||
outputBytes = len(content.encode("utf-8"))
|
||||
priceCHF = model.calculatepriceCHF(processingTime, inputBytes, outputBytes)
|
||||
|
||||
responseToolCalls = None
|
||||
if finalModelResponse.metadata:
|
||||
responseToolCalls = finalModelResponse.metadata.get("toolCalls")
|
||||
|
||||
response = AiCallResponse(
|
||||
content=content,
|
||||
modelName=model.name,
|
||||
provider=model.connectorType,
|
||||
priceCHF=priceCHF,
|
||||
processingTime=processingTime,
|
||||
bytesSent=inputBytes,
|
||||
bytesReceived=outputBytes,
|
||||
errorCount=0,
|
||||
toolCalls=responseToolCalls,
|
||||
)
|
||||
|
||||
if self.billingCallback:
|
||||
try:
|
||||
self.billingCallback(response)
|
||||
except Exception as e:
|
||||
logger.error(f"BILLING: Failed to record stream billing for {model.name}: {e}")
|
||||
|
||||
yield response
|
||||
|
||||
async def callEmbedding(self, texts: List[str], options: AiCallOptions = None) -> AiCallResponse:
|
||||
"""Generate embeddings for a list of texts using the best available embedding model.
|
||||
|
||||
Uses the standard model selector with OperationTypeEnum.EMBEDDING to pick the model.
|
||||
Failover across providers (OpenAI → Mistral) works identically to chat models.
|
||||
|
||||
Returns:
|
||||
AiCallResponse with metadata["embeddings"] containing the vectors.
|
||||
"""
|
||||
if options is None:
|
||||
options = AiCallOptions(operationType=OperationTypeEnum.EMBEDDING)
|
||||
else:
|
||||
options.operationType = OperationTypeEnum.EMBEDDING
|
||||
|
||||
combinedText = " ".join(texts[:3])[:500]
|
||||
availableModels = modelRegistry.getAvailableModels()
|
||||
failoverModelList = modelSelector.getFailoverModelList(
|
||||
combinedText, "", options, availableModels
|
||||
)
|
||||
|
||||
if not failoverModelList:
|
||||
return AiCallResponse(
|
||||
content="", modelName="error", priceCHF=0.0,
|
||||
processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1
|
||||
)
|
||||
|
||||
lastError = None
|
||||
for attempt, model in enumerate(failoverModelList):
|
||||
try:
|
||||
logger.info(f"Embedding call with {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
||||
inputBytes = sum(len(t.encode("utf-8")) for t in texts)
|
||||
startTime = time.time()
|
||||
|
||||
modelCall = AiModelCall(
|
||||
model=model, options=options, embeddingInput=texts
|
||||
)
|
||||
modelResponse = await model.functionCall(modelCall)
|
||||
|
||||
if not modelResponse.success:
|
||||
raise ValueError(f"Embedding call failed: {modelResponse.error}")
|
||||
|
||||
processingTime = time.time() - startTime
|
||||
priceCHF = model.calculatepriceCHF(processingTime, inputBytes, 0)
|
||||
embeddings = (modelResponse.metadata or {}).get("embeddings", [])
|
||||
|
||||
response = AiCallResponse(
|
||||
content="", modelName=model.name, provider=model.connectorType,
|
||||
priceCHF=priceCHF, processingTime=processingTime,
|
||||
bytesSent=inputBytes, bytesReceived=0, errorCount=0,
|
||||
metadata={"embeddings": embeddings}
|
||||
)
|
||||
|
||||
if self.billingCallback:
|
||||
try:
|
||||
self.billingCallback(response)
|
||||
except Exception as e:
|
||||
logger.error(f"BILLING: Failed to record billing for embedding {model.name}: {e}")
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
lastError = e
|
||||
logger.warning(f"Embedding call failed with {model.name}: {str(e)}")
|
||||
modelSelector.reportFailure(model.name)
|
||||
if attempt < len(failoverModelList) - 1:
|
||||
continue
|
||||
break
|
||||
|
||||
errorMsg = f"All embedding models failed. Last error: {str(lastError)}"
|
||||
logger.error(errorMsg)
|
||||
return AiCallResponse(
|
||||
content=errorMsg, modelName="error", priceCHF=0.0,
|
||||
processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1
|
||||
)
|
||||
|
||||
# Utility methods
|
||||
async def listAvailableModels(self, connectorType: str = None) -> List[Dict[str, Any]]:
|
||||
|
|
|
|||
|
|
@ -764,7 +764,11 @@ class BillingObjects:
|
|||
featureCode: str = None,
|
||||
aicoreProvider: str = None,
|
||||
aicoreModel: str = None,
|
||||
description: str = "AI Usage"
|
||||
description: str = "AI Usage",
|
||||
processingTime: float = None,
|
||||
bytesSent: int = None,
|
||||
bytesReceived: int = None,
|
||||
errorCount: int = None
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Record usage cost as a billing transaction.
|
||||
|
|
@ -774,20 +778,6 @@ class BillingObjects:
|
|||
- PREPAY_USER: deduct from user's own balance
|
||||
- PREPAY_MANDATE: deduct from mandate pool balance
|
||||
- CREDIT_POSTPAY: deduct from mandate pool balance
|
||||
|
||||
Args:
|
||||
mandateId: Mandate ID
|
||||
userId: User ID
|
||||
priceCHF: Cost in CHF
|
||||
workflowId: Optional workflow ID
|
||||
featureInstanceId: Optional feature instance ID
|
||||
featureCode: Optional feature code
|
||||
aicoreProvider: AICore provider name (e.g., 'anthropic', 'openai')
|
||||
aicoreModel: AICore model name (e.g., 'claude-4-sonnet', 'gpt-4o')
|
||||
description: Transaction description
|
||||
|
||||
Returns:
|
||||
Created transaction dict or None
|
||||
"""
|
||||
if priceCHF <= 0:
|
||||
return None
|
||||
|
|
@ -816,7 +806,11 @@ class BillingObjects:
|
|||
featureCode=featureCode,
|
||||
aicoreProvider=aicoreProvider,
|
||||
aicoreModel=aicoreModel,
|
||||
createdByUserId=userId
|
||||
createdByUserId=userId,
|
||||
processingTime=processingTime,
|
||||
bytesSent=bytesSent,
|
||||
bytesReceived=bytesReceived,
|
||||
errorCount=errorCount
|
||||
)
|
||||
|
||||
# Determine where to deduct balance
|
||||
|
|
@ -828,6 +822,20 @@ class BillingObjects:
|
|||
poolAccount = self.getOrCreateMandateAccount(mandateId)
|
||||
return self.createTransaction(transaction, balanceAccountId=poolAccount["id"])
|
||||
|
||||
# =========================================================================
|
||||
# Workflow Cost Query
|
||||
# =========================================================================
|
||||
|
||||
def getWorkflowCost(self, workflowId: str) -> float:
|
||||
"""Sum of all transaction amounts for a workflow."""
|
||||
if not workflowId:
|
||||
return 0.0
|
||||
transactions = self.db.getRecordset(
|
||||
BillingTransaction,
|
||||
recordFilter={"workflowId": workflowId}
|
||||
)
|
||||
return sum(t.get("amount", 0.0) for t in transactions)
|
||||
|
||||
# =========================================================================
|
||||
# Billing Model Switch Operations
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ from modules.datamodels.datamodelUam import AccessLevel
|
|||
|
||||
from modules.datamodels.datamodelChat import (
|
||||
ChatDocument,
|
||||
ChatStat,
|
||||
ChatLog,
|
||||
ChatMessage,
|
||||
ChatWorkflow,
|
||||
|
|
@ -663,10 +662,8 @@ class ChatObjects:
|
|||
|
||||
workflow = workflows[0]
|
||||
try:
|
||||
# Load related data from normalized tables
|
||||
logs = self.getLogs(workflowId)
|
||||
messages = self.getMessages(workflowId)
|
||||
stats = self.getStats(workflowId)
|
||||
|
||||
# Validate workflow data against ChatWorkflow model
|
||||
# Explicit type coercion: DB may store numeric fields as TEXT on some platforms
|
||||
|
|
@ -694,8 +691,7 @@ class ChatObjects:
|
|||
lastActivity=_toFloat(workflow.get("lastActivity")),
|
||||
startedAt=_toFloat(workflow.get("startedAt")),
|
||||
logs=logs,
|
||||
messages=messages,
|
||||
stats=stats
|
||||
messages=messages
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating workflow data: {str(e)}")
|
||||
|
|
@ -731,7 +727,7 @@ class ChatObjects:
|
|||
except Exception as e:
|
||||
logger.warning(f"Could not get Root mandate: {e}")
|
||||
# Note: ChatWorkflow has featureInstanceId for multi-tenancy isolation.
|
||||
# Child tables (ChatMessage, ChatLog, ChatStat, ChatDocument) are user-owned
|
||||
# Child tables (ChatMessage, ChatLog, ChatDocument) are user-owned
|
||||
# and do NOT store featureInstanceId - they inherit isolation from ChatWorkflow.
|
||||
# Ensure featureInstanceId is set from context if not already in workflowData
|
||||
if "featureInstanceId" not in workflowData or not workflowData.get("featureInstanceId"):
|
||||
|
|
@ -760,7 +756,7 @@ class ChatObjects:
|
|||
logs=[],
|
||||
messages=[],
|
||||
stats=[],
|
||||
workflowMode=created["workflowMode"],
|
||||
workflowMode=created.get("workflowMode", "Dynamic"),
|
||||
maxSteps=created.get("maxSteps", 1)
|
||||
)
|
||||
|
||||
|
|
@ -789,23 +785,20 @@ class ChatObjects:
|
|||
# Load fresh data from normalized tables
|
||||
logs = self.getLogs(workflowId)
|
||||
messages = self.getMessages(workflowId)
|
||||
stats = self.getStats(workflowId)
|
||||
|
||||
# Convert to ChatWorkflow model
|
||||
return ChatWorkflow(
|
||||
id=updated["id"],
|
||||
status=updated.get("status", workflow.status),
|
||||
name=updated.get("name", workflow.name),
|
||||
currentRound=updated.get("currentRound", workflow.currentRound),
|
||||
currentTask=updated.get("currentTask", workflow.currentTask),
|
||||
currentAction=updated.get("currentAction", workflow.currentAction),
|
||||
totalTasks=updated.get("totalTasks", workflow.totalTasks),
|
||||
totalActions=updated.get("totalActions", workflow.totalActions),
|
||||
currentRound=updated.get("currentRound") or getattr(workflow, "currentRound", 0) or 0,
|
||||
currentTask=updated.get("currentTask") or getattr(workflow, "currentTask", 0) or 0,
|
||||
currentAction=updated.get("currentAction") or getattr(workflow, "currentAction", 0) or 0,
|
||||
totalTasks=updated.get("totalTasks") or getattr(workflow, "totalTasks", 0) or 0,
|
||||
totalActions=updated.get("totalActions") or getattr(workflow, "totalActions", 0) or 0,
|
||||
lastActivity=updated.get("lastActivity", workflow.lastActivity),
|
||||
startedAt=updated.get("startedAt", workflow.startedAt),
|
||||
logs=logs,
|
||||
messages=messages,
|
||||
stats=stats
|
||||
messages=messages
|
||||
)
|
||||
|
||||
def deleteWorkflow(self, workflowId: str) -> bool:
|
||||
|
|
@ -827,7 +820,6 @@ class ChatObjects:
|
|||
messageId = message.id
|
||||
if messageId:
|
||||
# Delete message documents (but NOT the files!)
|
||||
# Note: ChatStat does NOT have messageId - stats are only at workflow level
|
||||
try:
|
||||
existing_docs = self._getRecordset(ChatDocument, recordFilter={"messageId": messageId})
|
||||
for doc in existing_docs:
|
||||
|
|
@ -839,11 +831,7 @@ class ChatObjects:
|
|||
self.db.recordDelete(ChatMessage, messageId)
|
||||
|
||||
# 2. Delete workflow stats
|
||||
existing_stats = self._getRecordset(ChatStat, recordFilter={"workflowId": workflowId})
|
||||
for stat in existing_stats:
|
||||
self.db.recordDelete(ChatStat, stat["id"])
|
||||
|
||||
# 3. Delete workflow logs
|
||||
# 2. Delete workflow logs
|
||||
existing_logs = self._getRecordset(ChatLog, recordFilter={"workflowId": workflowId})
|
||||
for log in existing_logs:
|
||||
self.db.recordDelete(ChatLog, log["id"])
|
||||
|
|
@ -1270,7 +1258,6 @@ class ChatObjects:
|
|||
self.db.recordDelete(ChatDocument, doc["id"])
|
||||
|
||||
# 2. Finally delete the message itself
|
||||
# Note: ChatStat has no messageId field -- stats are workflow-level, not message-level
|
||||
success = self.db.recordDelete(ChatMessage, messageId)
|
||||
|
||||
return success
|
||||
|
|
@ -1517,74 +1504,10 @@ class ChatObjects:
|
|||
# Return validated ChatLog instance
|
||||
return ChatLog(**createdLog)
|
||||
|
||||
# Stats methods
|
||||
|
||||
def getStats(self, workflowId: str) -> List[ChatStat]:
|
||||
"""Returns list of statistics for a workflow if user has access."""
|
||||
# Check workflow access first (without calling getWorkflow to avoid circular reference)
|
||||
# Use RBAC filtering
|
||||
workflows = self._getRecordset(ChatWorkflow, recordFilter={"id": workflowId})
|
||||
|
||||
if not workflows:
|
||||
return []
|
||||
|
||||
# Get stats for this workflow from normalized table
|
||||
stats = self._getRecordset(ChatStat, recordFilter={"workflowId": workflowId})
|
||||
|
||||
if not stats:
|
||||
return []
|
||||
|
||||
# Return all stats records sorted by creation time.
|
||||
# Use parseTimestamp to tolerate mixed DB types (float/string) on INT.
|
||||
# DB uses _createdAt (camelCase system field).
|
||||
stats.sort(key=lambda x: parseTimestamp(x.get("_createdAt"), default=0))
|
||||
|
||||
# Convert to ChatStat objects, preserving _createdAt via extra="allow"
|
||||
result = []
|
||||
for stat in stats:
|
||||
chat_stat = ChatStat(**stat)
|
||||
# Explicitly preserve _createdAt from raw DB record
|
||||
if "_createdAt" in stat:
|
||||
setattr(chat_stat, '_createdAt', stat["_createdAt"])
|
||||
result.append(chat_stat)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def createStat(self, statData: Dict[str, Any]) -> ChatStat:
|
||||
"""Creates a new stats record and returns it."""
|
||||
try:
|
||||
# Ensure workflowId is present in statData
|
||||
if "workflowId" not in statData:
|
||||
raise ValueError("workflowId is required in statData")
|
||||
|
||||
# Note: Chat data is user-owned, no mandate/featureInstance context stored
|
||||
# mandateId/featureInstanceId removed from ChatStat model
|
||||
|
||||
# Validate the stat data against ChatStat model
|
||||
stat = ChatStat(**statData)
|
||||
|
||||
logger.debug(f"Creating stat for workflow {statData.get('workflowId')}: "
|
||||
f"process={statData.get('process')}, "
|
||||
f"priceCHF={statData.get('priceCHF', 0):.4f}, "
|
||||
f"processingTime={statData.get('processingTime', 0):.2f}s")
|
||||
|
||||
# Create the stat record in the database
|
||||
created = self.db.recordCreate(ChatStat, stat)
|
||||
|
||||
logger.info(f"Created stat {created.get('id')} for workflow {statData.get('workflowId')}")
|
||||
|
||||
# Return the created ChatStat
|
||||
return ChatStat(**created)
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating workflow stat: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
def getUnifiedChatData(self, workflowId: str, afterTimestamp: Optional[float] = None) -> Dict[str, Any]:
|
||||
def getUnifiedChatData(self, workflowId: str, afterTimestamp: Optional[float] = None, workflowCost: float = 0.0) -> Dict[str, Any]:
|
||||
"""
|
||||
Returns unified chat data (messages, logs, stats) for a workflow in chronological order.
|
||||
Uses timestamp-based selective data transfer for efficient polling.
|
||||
Returns unified chat data (messages, logs) for a workflow in chronological order,
|
||||
plus workflowCost from billing transactions (single source of truth).
|
||||
"""
|
||||
# Check workflow access first
|
||||
# Use RBAC filtering
|
||||
|
|
@ -1652,29 +1575,10 @@ class ChatObjects:
|
|||
"item": chatLog
|
||||
})
|
||||
|
||||
# Get stats - ChatStat model supports _createdAt via model_config extra="allow"
|
||||
stats = self.getStats(workflowId)
|
||||
for stat in stats:
|
||||
# Apply timestamp filtering in Python
|
||||
# Use _createdAt (system field from DB, preserved via model_config extra="allow")
|
||||
stat_timestamp = getattr(stat, '_createdAt', None) or getUtcTimestamp()
|
||||
if afterTimestamp is not None and stat_timestamp <= afterTimestamp:
|
||||
continue
|
||||
|
||||
# Convert to dict and include _createdAt for frontend
|
||||
stat_dict = stat.model_dump() if hasattr(stat, 'model_dump') else stat.dict()
|
||||
stat_dict['_createdAt'] = stat_timestamp
|
||||
|
||||
items.append({
|
||||
"type": "stat",
|
||||
"createdAt": stat_timestamp,
|
||||
"item": stat_dict
|
||||
})
|
||||
|
||||
# Sort all items by createdAt timestamp for chronological order
|
||||
items.sort(key=lambda x: parseTimestamp(x.get("createdAt"), default=0))
|
||||
|
||||
return {"items": items}
|
||||
return {"items": items, "workflowCost": workflowCost}
|
||||
|
||||
|
||||
def getInterface(currentUser: Optional[User] = None, mandateId: Optional[str] = None, featureInstanceId: Optional[str] = None) -> 'ChatObjects':
|
||||
|
|
|
|||
234
modules/interfaces/interfaceDbKnowledge.py
Normal file
234
modules/interfaces/interfaceDbKnowledge.py
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Interface to the Knowledge Store database (poweron_knowledge).
|
||||
Provides CRUD for FileContentIndex, ContentChunk, WorkflowMemory
|
||||
and semantic search via pgvector.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from modules.connectors.connectorDbPostgre import _get_cached_connector
|
||||
from modules.datamodels.datamodelKnowledge import FileContentIndex, ContentChunk, WorkflowMemory
|
||||
from modules.datamodels.datamodelUam import User
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_instances: Dict[str, "KnowledgeObjects"] = {}
|
||||
|
||||
|
||||
class KnowledgeObjects:
|
||||
"""Interface to the Knowledge Store database.
|
||||
Manages FileContentIndex, ContentChunk, and WorkflowMemory with semantic search."""
|
||||
|
||||
def __init__(self):
|
||||
self.currentUser: Optional[User] = None
|
||||
self.userId: Optional[str] = None
|
||||
self._initializeDatabase()
|
||||
|
||||
def _initializeDatabase(self):
|
||||
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
|
||||
dbDatabase = "poweron_knowledge"
|
||||
dbUser = APP_CONFIG.get("DB_USER")
|
||||
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
|
||||
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
|
||||
|
||||
self.db = _get_cached_connector(
|
||||
dbHost=dbHost,
|
||||
dbDatabase=dbDatabase,
|
||||
dbUser=dbUser,
|
||||
dbPassword=dbPassword,
|
||||
dbPort=dbPort,
|
||||
userId=self.userId,
|
||||
)
|
||||
logger.info("Knowledge Store database initialized")
|
||||
|
||||
def setUserContext(self, user: User):
|
||||
self.currentUser = user
|
||||
self.userId = user.id if user else None
|
||||
if self.userId:
|
||||
self.db.updateContext(self.userId)
|
||||
|
||||
# =========================================================================
|
||||
# FileContentIndex CRUD
|
||||
# =========================================================================
|
||||
|
||||
def upsertFileContentIndex(self, index: FileContentIndex) -> Dict[str, Any]:
|
||||
"""Create or update a FileContentIndex entry."""
|
||||
data = index.model_dump()
|
||||
existing = self.db._loadRecord(FileContentIndex, index.id)
|
||||
if existing:
|
||||
return self.db.recordModify(FileContentIndex, index.id, data)
|
||||
return self.db.recordCreate(FileContentIndex, data)
|
||||
|
||||
def getFileContentIndex(self, fileId: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get a FileContentIndex by file ID."""
|
||||
return self.db._loadRecord(FileContentIndex, fileId)
|
||||
|
||||
def getFileContentIndexByUser(
|
||||
self, userId: str, featureInstanceId: str = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Get all FileContentIndex entries for a user."""
|
||||
recordFilter = {"userId": userId}
|
||||
if featureInstanceId:
|
||||
recordFilter["featureInstanceId"] = featureInstanceId
|
||||
return self.db.getRecordset(FileContentIndex, recordFilter=recordFilter)
|
||||
|
||||
def updateFileStatus(self, fileId: str, status: str) -> bool:
|
||||
"""Update the processing status of a FileContentIndex."""
|
||||
existing = self.db._loadRecord(FileContentIndex, fileId)
|
||||
if not existing:
|
||||
return False
|
||||
self.db.recordModify(FileContentIndex, fileId, {"status": status})
|
||||
return True
|
||||
|
||||
def deleteFileContentIndex(self, fileId: str) -> bool:
|
||||
"""Delete a FileContentIndex and all associated ContentChunks."""
|
||||
chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fileId})
|
||||
for chunk in chunks:
|
||||
self.db.recordDelete(ContentChunk, chunk["id"])
|
||||
return self.db.recordDelete(FileContentIndex, fileId)
|
||||
|
||||
# =========================================================================
|
||||
# ContentChunk CRUD
|
||||
# =========================================================================
|
||||
|
||||
def upsertContentChunk(self, chunk: ContentChunk) -> Dict[str, Any]:
|
||||
"""Create or update a ContentChunk."""
|
||||
data = chunk.model_dump()
|
||||
existing = self.db._loadRecord(ContentChunk, chunk.id)
|
||||
if existing:
|
||||
return self.db.recordModify(ContentChunk, chunk.id, data)
|
||||
return self.db.recordCreate(ContentChunk, data)
|
||||
|
||||
def upsertContentChunks(self, chunks: List[ContentChunk]) -> int:
|
||||
"""Batch upsert multiple ContentChunks. Returns count of upserted chunks."""
|
||||
count = 0
|
||||
for chunk in chunks:
|
||||
self.upsertContentChunk(chunk)
|
||||
count += 1
|
||||
return count
|
||||
|
||||
def getContentChunks(self, fileId: str) -> List[Dict[str, Any]]:
|
||||
"""Get all ContentChunks for a file."""
|
||||
return self.db.getRecordset(ContentChunk, recordFilter={"fileId": fileId})
|
||||
|
||||
def deleteContentChunks(self, fileId: str) -> int:
|
||||
"""Delete all ContentChunks for a file. Returns count of deleted chunks."""
|
||||
chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fileId})
|
||||
count = 0
|
||||
for chunk in chunks:
|
||||
if self.db.recordDelete(ContentChunk, chunk["id"]):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
# =========================================================================
|
||||
# WorkflowMemory CRUD
|
||||
# =========================================================================
|
||||
|
||||
def upsertWorkflowMemory(self, memory: WorkflowMemory) -> Dict[str, Any]:
|
||||
"""Create or update a WorkflowMemory entry."""
|
||||
data = memory.model_dump()
|
||||
existing = self.db._loadRecord(WorkflowMemory, memory.id)
|
||||
if existing:
|
||||
return self.db.recordModify(WorkflowMemory, memory.id, data)
|
||||
return self.db.recordCreate(WorkflowMemory, data)
|
||||
|
||||
def getWorkflowEntities(self, workflowId: str) -> List[Dict[str, Any]]:
|
||||
"""Get all WorkflowMemory entries for a workflow."""
|
||||
return self.db.getRecordset(WorkflowMemory, recordFilter={"workflowId": workflowId})
|
||||
|
||||
def getWorkflowEntity(self, workflowId: str, key: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get a specific WorkflowMemory entry by workflow and key."""
|
||||
results = self.db.getRecordset(
|
||||
WorkflowMemory, recordFilter={"workflowId": workflowId, "key": key}
|
||||
)
|
||||
return results[0] if results else None
|
||||
|
||||
def deleteWorkflowMemory(self, workflowId: str) -> int:
|
||||
"""Delete all WorkflowMemory entries for a workflow. Returns count."""
|
||||
entries = self.db.getRecordset(WorkflowMemory, recordFilter={"workflowId": workflowId})
|
||||
count = 0
|
||||
for entry in entries:
|
||||
if self.db.recordDelete(WorkflowMemory, entry["id"]):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
# =========================================================================
|
||||
# Semantic Search
|
||||
# =========================================================================
|
||||
|
||||
def semanticSearch(
|
||||
self,
|
||||
queryVector: List[float],
|
||||
userId: str = None,
|
||||
featureInstanceId: str = None,
|
||||
mandateId: str = None,
|
||||
isShared: bool = None,
|
||||
limit: int = 10,
|
||||
minScore: float = None,
|
||||
contentType: str = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Semantic search across ContentChunks using pgvector cosine similarity.
|
||||
|
||||
Args:
|
||||
queryVector: Query embedding vector.
|
||||
userId: Filter by user (Instance Layer).
|
||||
featureInstanceId: Filter by feature instance.
|
||||
mandateId: Filter by mandate (for Shared Layer lookups).
|
||||
isShared: If True, search Shared Layer via FileContentIndex join.
|
||||
limit: Max results.
|
||||
minScore: Minimum cosine similarity (0.0 - 1.0).
|
||||
contentType: Filter by content type (text, image, etc.).
|
||||
|
||||
Returns:
|
||||
List of ContentChunk records with _score field, sorted by relevance.
|
||||
"""
|
||||
recordFilter = {}
|
||||
if userId:
|
||||
recordFilter["userId"] = userId
|
||||
if featureInstanceId:
|
||||
recordFilter["featureInstanceId"] = featureInstanceId
|
||||
if contentType:
|
||||
recordFilter["contentType"] = contentType
|
||||
|
||||
return self.db.semanticSearch(
|
||||
modelClass=ContentChunk,
|
||||
vectorColumn="embedding",
|
||||
queryVector=queryVector,
|
||||
limit=limit,
|
||||
recordFilter=recordFilter if recordFilter else None,
|
||||
minScore=minScore,
|
||||
)
|
||||
|
||||
def semanticSearchWorkflowMemory(
|
||||
self,
|
||||
queryVector: List[float],
|
||||
workflowId: str,
|
||||
limit: int = 5,
|
||||
minScore: float = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Semantic search across WorkflowMemory entries."""
|
||||
return self.db.semanticSearch(
|
||||
modelClass=WorkflowMemory,
|
||||
vectorColumn="embedding",
|
||||
queryVector=queryVector,
|
||||
limit=limit,
|
||||
recordFilter={"workflowId": workflowId},
|
||||
minScore=minScore,
|
||||
)
|
||||
|
||||
|
||||
def getInterface(currentUser: Optional[User] = None) -> KnowledgeObjects:
|
||||
"""Get or create a KnowledgeObjects singleton."""
|
||||
if "default" not in _instances:
|
||||
_instances["default"] = KnowledgeObjects()
|
||||
|
||||
interface = _instances["default"]
|
||||
if currentUser:
|
||||
interface.setUserContext(currentUser)
|
||||
|
||||
return interface
|
||||
|
|
@ -58,7 +58,6 @@ TABLE_NAMESPACE = {
|
|||
"ChatWorkflow": "chat",
|
||||
"ChatMessage": "chat",
|
||||
"ChatLog": "chat",
|
||||
"ChatStat": "chat",
|
||||
"ChatDocument": "chat",
|
||||
"Prompt": "chat",
|
||||
# Chatbot (poweron_chatbot) - per feature-instance isolation
|
||||
|
|
@ -69,13 +68,20 @@ TABLE_NAMESPACE = {
|
|||
# Files - benutzer-eigen
|
||||
"FileItem": "files",
|
||||
"FileData": "files",
|
||||
"FileFolder": "files",
|
||||
# Automation - benutzer-eigen
|
||||
"AutomationDefinition": "automation",
|
||||
"AutomationTemplate": "automation",
|
||||
# Knowledge Store - benutzer-eigen
|
||||
"FileContentIndex": "knowledge",
|
||||
"ContentChunk": "knowledge",
|
||||
"WorkflowMemory": "knowledge",
|
||||
# Data Sources - benutzer-eigen
|
||||
"DataSource": "datasource",
|
||||
}
|
||||
|
||||
# Namespaces ohne Mandantenkontext - GROUP wird auf MY gemappt
|
||||
USER_OWNED_NAMESPACES = {"chat", "chatbot", "files", "automation"}
|
||||
USER_OWNED_NAMESPACES = {"chat", "chatbot", "files", "automation", "knowledge", "datasource"}
|
||||
|
||||
|
||||
def buildDataObjectKey(tableName: str, featureCode: Optional[str] = None) -> str:
|
||||
|
|
@ -175,7 +181,7 @@ def getRecordsetWithRBAC(
|
|||
whereValues = []
|
||||
|
||||
# CRITICAL: Only pass featureInstanceId to WHERE clause if the model actually has
|
||||
# this column. Chat child tables (ChatMessage, ChatLog, ChatStat, ChatDocument)
|
||||
# this column. Chat child tables (ChatMessage, ChatLog, ChatDocument)
|
||||
# are user-owned and do NOT have featureInstanceId - only ChatWorkflow does.
|
||||
# Without this check, the SQL query would reference a non-existent column,
|
||||
# causing a silent error that returns empty results.
|
||||
|
|
|
|||
|
|
@ -247,19 +247,13 @@ def _getInstancePermissions(rootInterface, userId: str, instanceId: str) -> Dict
|
|||
# Get FeatureAccess for this user and instance (Pydantic model)
|
||||
featureAccess = rootInterface.getFeatureAccess(userId, instanceId)
|
||||
|
||||
logger.debug(f"_getInstancePermissions: userId={userId}, instanceId={instanceId}, featureAccess={featureAccess is not None}")
|
||||
|
||||
if not featureAccess:
|
||||
logger.debug(f"_getInstancePermissions: No FeatureAccess found for user {userId} and instance {instanceId}")
|
||||
return permissions
|
||||
|
||||
# Get role IDs via interface method
|
||||
roleIds = rootInterface.getRoleIdsForFeatureAccess(str(featureAccess.id))
|
||||
|
||||
logger.debug(f"_getInstancePermissions: featureAccessId={featureAccess.id}, roleIds={roleIds}")
|
||||
|
||||
if not roleIds:
|
||||
logger.debug(f"_getInstancePermissions: No roles found for FeatureAccess {featureAccess.id}")
|
||||
return permissions
|
||||
|
||||
# Check if user has admin role
|
||||
|
|
@ -274,8 +268,6 @@ def _getInstancePermissions(rootInterface, userId: str, instanceId: str) -> Dict
|
|||
# Get all rules for this role (returns Pydantic models)
|
||||
accessRules = rootInterface.getAccessRules(roleId=roleId)
|
||||
|
||||
logger.debug(f"_getInstancePermissions: roleId={roleId}, accessRules={len(accessRules) if accessRules else 0}")
|
||||
|
||||
for rule in accessRules:
|
||||
context = rule.context
|
||||
item = rule.item or ""
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from modules.auth import limiter, requireSysAdminRole, getRequestContext, Reques
|
|||
|
||||
# Import billing components
|
||||
from modules.interfaces.interfaceDbBilling import getInterface as getBillingInterface, _getRootInterface
|
||||
from modules.services.serviceBilling.mainServiceBilling import getService as getBillingService
|
||||
from modules.serviceCenter.services.serviceBilling.mainServiceBilling import getService as getBillingService
|
||||
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata, normalize_pagination_dict
|
||||
from modules.routes.routeDataUsers import _applyFiltersAndSort
|
||||
from modules.datamodels.datamodelBilling import (
|
||||
|
|
@ -162,6 +162,23 @@ def _isAdminOfMandate(ctx: RequestContext, targetMandateId: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def _isMemberOfMandate(ctx: RequestContext, targetMandateId: str) -> bool:
|
||||
"""Check if user has any enabled membership in the specified mandate."""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
rootInterface = getRootInterface()
|
||||
userMandates = rootInterface.getUserMandates(str(ctx.user.id))
|
||||
for um in userMandates:
|
||||
if str(getattr(um, 'mandateId', None)) != str(targetMandateId):
|
||||
continue
|
||||
if not getattr(um, 'enabled', True):
|
||||
continue
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _filterTransactionsByScope(transactions: list, scope: BillingDataScope) -> list:
|
||||
"""
|
||||
Filter a list of transaction dicts based on the user's BillingDataScope.
|
||||
|
|
@ -720,11 +737,11 @@ def createCheckoutSession(
|
|||
targetMandateId: str = Path(..., description="Mandate ID"),
|
||||
checkoutRequest: CheckoutCreateRequest = Body(...),
|
||||
ctx: RequestContext = Depends(getRequestContext),
|
||||
_admin = Depends(requireSysAdminRole)
|
||||
):
|
||||
"""
|
||||
Create Stripe Checkout Session for credit top-up. Returns redirect URL.
|
||||
SysAdmin only. Amount is validated server-side against allowed presets.
|
||||
RBAC: PREPAY_USER requires mandate membership (user loads own account),
|
||||
PREPAY_MANDATE requires mandate admin role.
|
||||
"""
|
||||
try:
|
||||
billingInterface = getBillingInterface(ctx.user, targetMandateId)
|
||||
|
|
@ -738,10 +755,17 @@ def createCheckoutSession(
|
|||
if billingModel == BillingModelEnum.PREPAY_USER:
|
||||
if not checkoutRequest.userId:
|
||||
raise HTTPException(status_code=400, detail="userId is required for PREPAY_USER model")
|
||||
elif billingModel not in [BillingModelEnum.PREPAY_MANDATE, BillingModelEnum.CREDIT_POSTPAY]:
|
||||
if str(checkoutRequest.userId) != str(ctx.user.id):
|
||||
raise HTTPException(status_code=403, detail="Users can only load credit to their own account")
|
||||
if not _isMemberOfMandate(ctx, targetMandateId):
|
||||
raise HTTPException(status_code=403, detail="User is not a member of this mandate")
|
||||
elif billingModel in [BillingModelEnum.PREPAY_MANDATE, BillingModelEnum.CREDIT_POSTPAY]:
|
||||
if not _isAdminOfMandate(ctx, targetMandateId):
|
||||
raise HTTPException(status_code=403, detail="Mandate admin role required to load mandate credit")
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Cannot add credit to {billingModel.value} billing model")
|
||||
|
||||
from modules.services.serviceBilling.stripeCheckout import create_checkout_session
|
||||
from modules.serviceCenter.services.serviceBilling.stripeCheckout import create_checkout_session
|
||||
redirect_url = create_checkout_session(
|
||||
mandate_id=targetMandateId,
|
||||
user_id=checkoutRequest.userId,
|
||||
|
|
@ -768,7 +792,7 @@ async def stripeWebhook(
|
|||
No JWT auth - Stripe authenticates via Stripe-Signature header.
|
||||
"""
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.services.serviceBilling.stripeCheckout import ALLOWED_AMOUNTS_CHF
|
||||
from modules.serviceCenter.services.serviceBilling.stripeCheckout import ALLOWED_AMOUNTS_CHF
|
||||
|
||||
webhook_secret = APP_CONFIG.get("STRIPE_WEBHOOK_SECRET")
|
||||
if not webhook_secret:
|
||||
|
|
|
|||
|
|
@ -19,6 +19,114 @@ from modules.datamodels.datamodelPagination import PaginationParams, PaginatedRe
|
|||
# Configure logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
|
||||
"""Background task: pre-scan + extraction + knowledge indexing.
|
||||
Step 1: Structure Pre-Scan (AI-free) -> FileContentIndex (persisted)
|
||||
Step 2: Content extraction via runExtraction -> ContentParts
|
||||
Step 3: KnowledgeService.indexFile -> chunking + embedding -> Knowledge Store"""
|
||||
userId = user.id if hasattr(user, "id") else str(user)
|
||||
try:
|
||||
mgmtInterface = interfaceDbManagement.getInterface(user)
|
||||
mgmtInterface.updateFile(fileId, {"status": "processing"})
|
||||
|
||||
rawBytes = mgmtInterface.getFileData(fileId)
|
||||
if not rawBytes:
|
||||
logger.warning(f"Auto-index: no file data for {fileId}, skipping")
|
||||
mgmtInterface.updateFile(fileId, {"status": "active"})
|
||||
return
|
||||
|
||||
logger.info(f"Auto-index starting for {fileName} ({len(rawBytes)} bytes, {mimeType})")
|
||||
|
||||
# Step 1: Structure Pre-Scan (AI-free)
|
||||
from modules.serviceCenter.services.serviceKnowledge.subPreScan import preScanDocument
|
||||
contentIndex = await preScanDocument(
|
||||
fileData=rawBytes,
|
||||
mimeType=mimeType,
|
||||
fileId=fileId,
|
||||
fileName=fileName,
|
||||
userId=userId,
|
||||
)
|
||||
logger.info(
|
||||
f"Pre-scan complete for {fileName}: "
|
||||
f"{contentIndex.totalObjects} objects"
|
||||
)
|
||||
|
||||
# Persist FileContentIndex immediately
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||
knowledgeDb = getKnowledgeInterface()
|
||||
knowledgeDb.upsertFileContentIndex(contentIndex)
|
||||
|
||||
# Step 2: Content extraction (AI-free, produces ContentParts)
|
||||
from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry, ChunkerRegistry
|
||||
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||
|
||||
extractorRegistry = ExtractorRegistry()
|
||||
chunkerRegistry = ChunkerRegistry()
|
||||
options = ExtractionOptions()
|
||||
|
||||
extracted = runExtraction(
|
||||
extractorRegistry, chunkerRegistry,
|
||||
rawBytes, fileName, mimeType, options,
|
||||
)
|
||||
|
||||
contentObjects = []
|
||||
for part in extracted.parts:
|
||||
contentType = "text"
|
||||
if part.typeGroup == "image":
|
||||
contentType = "image"
|
||||
elif part.typeGroup in ("binary", "container"):
|
||||
contentType = "other"
|
||||
|
||||
if not part.data or not part.data.strip():
|
||||
continue
|
||||
|
||||
contentObjects.append({
|
||||
"contentObjectId": part.id,
|
||||
"contentType": contentType,
|
||||
"data": part.data,
|
||||
"contextRef": {
|
||||
"containerPath": fileName,
|
||||
"location": part.label or "file",
|
||||
**(part.metadata or {}),
|
||||
},
|
||||
})
|
||||
|
||||
logger.info(f"Extracted {len(contentObjects)} content objects from {fileName}")
|
||||
|
||||
if not contentObjects:
|
||||
knowledgeDb.updateFileStatus(fileId, "indexed")
|
||||
mgmtInterface.updateFile(fileId, {"status": "active"})
|
||||
return
|
||||
|
||||
# Step 3: Knowledge indexing (chunking + embedding)
|
||||
from modules.serviceCenter import getService
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
|
||||
ctx = ServiceCenterContext(user=user, mandate_id="", feature_instance_id="")
|
||||
knowledgeService = getService("knowledge", ctx)
|
||||
|
||||
await knowledgeService.indexFile(
|
||||
fileId=fileId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
contentObjects=contentObjects,
|
||||
structure=contentIndex.structure,
|
||||
)
|
||||
|
||||
mgmtInterface.updateFile(fileId, {"status": "active"})
|
||||
logger.info(f"Auto-index complete for file {fileId} ({fileName})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Auto-index failed for file {fileId}: {e}", exc_info=True)
|
||||
try:
|
||||
errMgmt = interfaceDbManagement.getInterface(user)
|
||||
errMgmt.updateFile(fileId, {"status": "active"})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Model attributes for FileItem
|
||||
fileAttributes = getModelAttributeDefinitions(FileItem)
|
||||
|
||||
|
|
@ -111,6 +219,7 @@ async def upload_file(
|
|||
request: Request,
|
||||
file: UploadFile = File(...),
|
||||
workflowId: Optional[str] = Form(None),
|
||||
featureInstanceId: Optional[str] = Form(None),
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
) -> JSONResponse:
|
||||
# Add fileName property to UploadFile for consistency with backend model
|
||||
|
|
@ -133,6 +242,10 @@ async def upload_file(
|
|||
# Save file via LucyDOM interface in the database
|
||||
fileItem, duplicateType = managementInterface.saveUploadedFile(fileContent, file.filename)
|
||||
|
||||
if featureInstanceId and not fileItem.featureInstanceId:
|
||||
managementInterface.updateFile(fileItem.id, {"featureInstanceId": featureInstanceId})
|
||||
fileItem.featureInstanceId = featureInstanceId
|
||||
|
||||
# Determine response message based on duplicate type
|
||||
if duplicateType == "exact_duplicate":
|
||||
message = f"File '{file.filename}' already exists with identical content. Reusing existing file."
|
||||
|
|
@ -148,6 +261,32 @@ async def upload_file(
|
|||
if workflowId:
|
||||
fileMeta["workflowId"] = workflowId
|
||||
|
||||
# Trigger background auto-index pipeline (non-blocking)
|
||||
# Also runs for duplicates in case the original was never successfully indexed
|
||||
shouldIndex = duplicateType == "new_file"
|
||||
if not shouldIndex:
|
||||
try:
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as _getKnowledgeInterface
|
||||
_kDb = _getKnowledgeInterface()
|
||||
_existingIndex = _kDb.getFileContentIndex(fileItem.id)
|
||||
if not _existingIndex:
|
||||
shouldIndex = True
|
||||
logger.info(f"Re-triggering auto-index for duplicate {fileItem.id} (not yet indexed)")
|
||||
except Exception:
|
||||
shouldIndex = True
|
||||
|
||||
if shouldIndex:
|
||||
try:
|
||||
import asyncio
|
||||
asyncio.ensure_future(_autoIndexFile(
|
||||
fileId=fileItem.id,
|
||||
fileName=fileItem.fileName,
|
||||
mimeType=fileItem.mimeType,
|
||||
user=currentUser,
|
||||
))
|
||||
except Exception as indexErr:
|
||||
logger.warning(f"Auto-index trigger failed (non-blocking): {indexErr}")
|
||||
|
||||
# Response with duplicate information
|
||||
return JSONResponse({
|
||||
"message": message,
|
||||
|
|
|
|||
|
|
@ -764,7 +764,7 @@ def send_password_link(
|
|||
expiryHours = int(APP_CONFIG.get("Auth_RESET_TOKEN_EXPIRY_HOURS", "24"))
|
||||
|
||||
try:
|
||||
from modules.services import Services
|
||||
from modules.serviceHub import Services
|
||||
services = Services(targetUser)
|
||||
|
||||
emailSubject = "PowerOn - Passwort setzen"
|
||||
|
|
|
|||
|
|
@ -395,7 +395,7 @@ def trigger_subscription(
|
|||
)
|
||||
|
||||
# Get messaging service from request app state
|
||||
from modules.services import getInterface as getServicesInterface
|
||||
from modules.serviceHub import getInterface as getServicesInterface
|
||||
services = getServicesInterface(context.user, None, mandateId=str(context.mandateId))
|
||||
|
||||
# Konvertiere Dict zu Pydantic Model
|
||||
|
|
|
|||
|
|
@ -87,9 +87,10 @@ CLIENT_SECRET = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET")
|
|||
REDIRECT_URI = APP_CONFIG.get("Service_GOOGLE_REDIRECT_URI")
|
||||
SCOPES = [
|
||||
"https://www.googleapis.com/auth/gmail.readonly",
|
||||
"https://www.googleapis.com/auth/drive.readonly",
|
||||
"https://www.googleapis.com/auth/userinfo.profile",
|
||||
"https://www.googleapis.com/auth/userinfo.email",
|
||||
"openid"
|
||||
"openid",
|
||||
]
|
||||
|
||||
@router.get("/config")
|
||||
|
|
@ -488,7 +489,7 @@ async def auth_callback(code: str, state: str, request: Request, response: Respo
|
|||
connection.externalUsername = user_info.get("email")
|
||||
connection.externalEmail = user_info.get("email")
|
||||
# Store actually granted scopes for this connection
|
||||
granted_scopes_list = granted_scopes.split(" ") if granted_scopes else SCOPES
|
||||
granted_scopes_list = granted_scopes if isinstance(granted_scopes, list) else (granted_scopes.split(" ") if granted_scopes else SCOPES)
|
||||
connection.grantedScopes = granted_scopes_list
|
||||
logger.info(f"Storing granted scopes for connection {connection_id}: {granted_scopes_list}")
|
||||
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ SCOPES = [
|
|||
"Mail.Send", # Send mail
|
||||
"Files.ReadWrite.All", # Read and write files (SharePoint/OneDrive)
|
||||
"Sites.ReadWrite.All", # Read and write SharePoint sites
|
||||
"Team.ReadBasic.All", # List joined teams and channels
|
||||
# Teams Bot: Meeting and chat access (requires admin consent)
|
||||
"OnlineMeetings.Read", # Read user's Teams meeting details (delegated scope)
|
||||
"Chat.ReadWrite", # Read and write Teams chat messages
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ from fastapi import APIRouter, HTTPException, Depends, Path, Query, Request, sta
|
|||
from modules.auth import limiter, getCurrentUser
|
||||
from modules.datamodels.datamodelUam import User, UserConnection
|
||||
from modules.interfaces.interfaceDbApp import getInterface
|
||||
from modules.services import getInterface as getServices
|
||||
from modules.serviceHub import getInterface as getServices
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -123,6 +123,9 @@ def _getFeatureUiObjects(featureCode: str) -> List[Dict[str, Any]]:
|
|||
elif featureCode == "commcoach":
|
||||
from modules.features.commcoach.mainCommcoach import UI_OBJECTS
|
||||
return UI_OBJECTS
|
||||
elif featureCode == "workspace":
|
||||
from modules.features.workspace.mainWorkspace import UI_OBJECTS
|
||||
return UI_OBJECTS
|
||||
else:
|
||||
logger.warning(f"Unknown feature code: {featureCode}")
|
||||
return []
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ logger = logging.getLogger(__name__)
|
|||
def getService(
|
||||
key: str,
|
||||
context: ServiceCenterContext,
|
||||
legacy_hub: Optional[Any] = None,
|
||||
) -> Any:
|
||||
"""
|
||||
Get a service instance by key for the given context.
|
||||
|
|
@ -34,14 +33,13 @@ def getService(
|
|||
Args:
|
||||
key: Service key (e.g., "web", "extraction", "utils")
|
||||
context: ServiceCenterContext with user, mandate_id, feature_instance_id, workflow
|
||||
legacy_hub: Optional legacy Services instance for fallback when service not yet migrated
|
||||
|
||||
Returns:
|
||||
Service instance
|
||||
"""
|
||||
cache = get_resolution_cache()
|
||||
resolving = set()
|
||||
return resolve(key, context, cache, resolving, legacy_hub=legacy_hub)
|
||||
return resolve(key, context, cache, resolving)
|
||||
|
||||
|
||||
def preWarm(service_keys: Optional[List[str]] = None) -> None:
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ class EventManager:
|
|||
"""Initialize the event manager."""
|
||||
self._queues: Dict[str, asyncio.Queue] = {}
|
||||
self._cleanup_tasks: Dict[str, asyncio.Task] = {}
|
||||
self._agent_tasks: Dict[str, asyncio.Task] = {}
|
||||
self._cancelled: Dict[str, bool] = {}
|
||||
|
||||
def create_queue(self, workflow_id: str) -> asyncio.Queue:
|
||||
"""
|
||||
|
|
@ -33,9 +35,22 @@ class EventManager:
|
|||
Returns:
|
||||
Async queue for events
|
||||
"""
|
||||
if workflow_id in self._cleanup_tasks:
|
||||
self._cleanup_tasks[workflow_id].cancel()
|
||||
del self._cleanup_tasks[workflow_id]
|
||||
logger.debug(f"Cancelled pending cleanup for workflow {workflow_id}")
|
||||
|
||||
if workflow_id not in self._queues:
|
||||
self._queues[workflow_id] = asyncio.Queue()
|
||||
logger.debug(f"Created event queue for workflow {workflow_id}")
|
||||
else:
|
||||
old = self._queues[workflow_id]
|
||||
while not old.empty():
|
||||
try:
|
||||
old.get_nowait()
|
||||
except asyncio.QueueEmpty:
|
||||
break
|
||||
logger.debug(f"Reusing event queue for workflow {workflow_id} (drained stale events)")
|
||||
return self._queues[workflow_id]
|
||||
|
||||
def get_queue(self, workflow_id: str) -> Optional[asyncio.Queue]:
|
||||
|
|
@ -62,6 +77,31 @@ class EventManager:
|
|||
"""
|
||||
return workflow_id in self._queues
|
||||
|
||||
def register_agent_task(self, workflow_id: str, task: asyncio.Task) -> None:
|
||||
"""Register the asyncio Task running the agent for a workflow."""
|
||||
self._agent_tasks[workflow_id] = task
|
||||
self._cancelled.pop(workflow_id, None)
|
||||
|
||||
def is_cancelled(self, workflow_id: str) -> bool:
|
||||
"""Check if a workflow has been cancelled."""
|
||||
return self._cancelled.get(workflow_id, False)
|
||||
|
||||
async def cancel_agent(self, workflow_id: str) -> bool:
|
||||
"""Cancel the running agent task for a workflow. Returns True if cancelled."""
|
||||
self._cancelled[workflow_id] = True
|
||||
task = self._agent_tasks.pop(workflow_id, None)
|
||||
if task and not task.done():
|
||||
task.cancel()
|
||||
logger.info(f"Cancelled agent task for workflow {workflow_id}")
|
||||
return True
|
||||
logger.debug(f"No running agent task found for workflow {workflow_id}")
|
||||
return False
|
||||
|
||||
def _unregister_agent_task(self, workflow_id: str) -> None:
|
||||
"""Remove the agent task reference after completion."""
|
||||
self._agent_tasks.pop(workflow_id, None)
|
||||
self._cancelled.pop(workflow_id, None)
|
||||
|
||||
async def emit_event(
|
||||
self,
|
||||
context_id: str,
|
||||
|
|
@ -97,7 +137,8 @@ class EventManager:
|
|||
|
||||
try:
|
||||
await queue.put(event)
|
||||
logger.debug(f"Emitted {event_type} event for workflow {context_id}")
|
||||
if event_type not in ("chunk",):
|
||||
logger.debug(f"Emitted {event_type} event for workflow {context_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error emitting event for workflow {context_id}: {e}", exc_info=True)
|
||||
|
||||
|
|
|
|||
|
|
@ -98,6 +98,20 @@ IMPORTABLE_SERVICES: Dict[str, Dict[str, Any]] = {
|
|||
"objectKey": "service.neutralization",
|
||||
"label": {"en": "Neutralization", "de": "Neutralisierung", "fr": "Neutralisation"},
|
||||
},
|
||||
"agent": {
|
||||
"module": "modules.serviceCenter.services.serviceAgent.mainServiceAgent",
|
||||
"class": "AgentService",
|
||||
"dependencies": ["ai", "chat", "utils", "extraction", "billing", "streaming", "knowledge"],
|
||||
"objectKey": "service.agent",
|
||||
"label": {"en": "Agent", "de": "Agent", "fr": "Agent"},
|
||||
},
|
||||
"knowledge": {
|
||||
"module": "modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge",
|
||||
"class": "KnowledgeService",
|
||||
"dependencies": ["ai"],
|
||||
"objectKey": "service.knowledge",
|
||||
"label": {"en": "Knowledge Store", "de": "Wissensspeicher", "fr": "Base de connaissances"},
|
||||
},
|
||||
}
|
||||
|
||||
# RBAC objects for service-level access control (for catalog registration)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# All rights reserved.
|
||||
"""
|
||||
Service Center Resolver.
|
||||
Resolution logic, dependency injection, and optional legacy fallback.
|
||||
Resolution logic and dependency injection for service instantiation.
|
||||
"""
|
||||
|
||||
import importlib
|
||||
|
|
@ -14,7 +14,6 @@ from modules.serviceCenter.registry import CORE_SERVICES, IMPORTABLE_SERVICES
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Type for get_service callable passed to services
|
||||
GetServiceFunc = Callable[[str], Any]
|
||||
|
||||
|
||||
|
|
@ -29,50 +28,15 @@ def _load_service_class(module_path: str, class_name: str):
|
|||
return getattr(module, class_name)
|
||||
|
||||
|
||||
def _create_legacy_hub(ctx: ServiceCenterContext) -> Any:
|
||||
"""Create legacy Services instance for fallback when service not yet migrated."""
|
||||
from modules.services import getInterface
|
||||
return getInterface(
|
||||
ctx.user,
|
||||
workflow=ctx.workflow,
|
||||
mandateId=ctx.mandate_id,
|
||||
featureInstanceId=ctx.feature_instance_id,
|
||||
)
|
||||
|
||||
|
||||
def _get_from_legacy(legacy_hub: Any, key: str) -> Any:
|
||||
"""Map service key to legacy hub attribute (for fallback when service center module fails)."""
|
||||
key_to_attr = {
|
||||
"utils": "utils",
|
||||
"security": "security",
|
||||
"streaming": "streaming",
|
||||
"ticket": "ticket",
|
||||
"messaging": "messaging",
|
||||
"billing": "billing",
|
||||
"sharepoint": "sharepoint",
|
||||
"chat": "chat",
|
||||
"extraction": "extraction",
|
||||
"generation": "generation",
|
||||
"ai": "ai",
|
||||
"web": "web",
|
||||
"neutralization": "neutralization",
|
||||
}
|
||||
attr = key_to_attr.get(key)
|
||||
if attr and hasattr(legacy_hub, attr):
|
||||
return getattr(legacy_hub, attr)
|
||||
return None
|
||||
|
||||
|
||||
def resolve(
|
||||
key: str,
|
||||
context: ServiceCenterContext,
|
||||
cache: Dict[str, Any],
|
||||
resolving: Set[str],
|
||||
legacy_hub: Optional[Any] = None,
|
||||
) -> Any:
|
||||
"""
|
||||
Resolve a service by key. Uses cache, resolves dependencies recursively.
|
||||
Falls back to legacy_hub if service module cannot be loaded.
|
||||
Raises KeyError if the service is not registered.
|
||||
"""
|
||||
cache_key = f"{_make_context_id(context)}_{key}"
|
||||
if cache_key in cache:
|
||||
|
|
@ -82,59 +46,20 @@ def resolve(
|
|||
raise RuntimeError(f"Circular dependency detected for service: {key}")
|
||||
|
||||
def get_service(dep_key: str) -> Any:
|
||||
return resolve(dep_key, context, cache, resolving, legacy_hub)
|
||||
return resolve(dep_key, context, cache, resolving)
|
||||
|
||||
# Try core first
|
||||
if key in CORE_SERVICES:
|
||||
spec = CORE_SERVICES[key]
|
||||
spec = CORE_SERVICES.get(key) or IMPORTABLE_SERVICES.get(key)
|
||||
if spec:
|
||||
cls = _load_service_class(spec["module"], spec["class"])
|
||||
resolving.add(key)
|
||||
try:
|
||||
cls = _load_service_class(spec["module"], spec["class"])
|
||||
resolving.add(key)
|
||||
try:
|
||||
for dep in spec.get("dependencies", []):
|
||||
get_service(dep)
|
||||
finally:
|
||||
resolving.discard(key)
|
||||
instance = cls(context, get_service)
|
||||
cache[cache_key] = instance
|
||||
return instance
|
||||
except (ImportError, ModuleNotFoundError, AttributeError) as e:
|
||||
logger.debug(f"Could not load core service '{key}' from service center: {e}")
|
||||
if legacy_hub:
|
||||
fallback = _get_from_legacy(legacy_hub, key)
|
||||
if fallback is not None:
|
||||
cache[cache_key] = fallback
|
||||
return fallback
|
||||
raise
|
||||
|
||||
# Try importable
|
||||
if key in IMPORTABLE_SERVICES:
|
||||
spec = IMPORTABLE_SERVICES[key]
|
||||
try:
|
||||
cls = _load_service_class(spec["module"], spec["class"])
|
||||
resolving.add(key)
|
||||
try:
|
||||
for dep in spec.get("dependencies", []):
|
||||
get_service(dep)
|
||||
finally:
|
||||
resolving.discard(key)
|
||||
instance = cls(context, get_service)
|
||||
cache[cache_key] = instance
|
||||
return instance
|
||||
except (ImportError, ModuleNotFoundError, AttributeError) as e:
|
||||
logger.debug(f"Could not load importable service '{key}' from service center: {e}")
|
||||
if legacy_hub:
|
||||
fallback = _get_from_legacy(legacy_hub, key)
|
||||
if fallback is not None:
|
||||
cache[cache_key] = fallback
|
||||
return fallback
|
||||
raise
|
||||
|
||||
if legacy_hub:
|
||||
fallback = _get_from_legacy(legacy_hub, key)
|
||||
if fallback is not None:
|
||||
cache[cache_key] = fallback
|
||||
return fallback
|
||||
for dep in spec.get("dependencies", []):
|
||||
get_service(dep)
|
||||
finally:
|
||||
resolving.discard(key)
|
||||
instance = cls(context, get_service)
|
||||
cache[cache_key] = instance
|
||||
return instance
|
||||
|
||||
raise KeyError(f"Unknown service: {key}")
|
||||
|
||||
|
|
|
|||
3
modules/serviceCenter/services/serviceAgent/__init__.py
Normal file
3
modules/serviceCenter/services/serviceAgent/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""serviceAgent: AI Agent with ReAct loop and native function calling."""
|
||||
162
modules/serviceCenter/services/serviceAgent/actionToolAdapter.py
Normal file
162
modules/serviceCenter/services/serviceAgent/actionToolAdapter.py
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""ActionToolAdapter: wraps existing workflow actions (dynamicMode=True) as agent tools."""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelAgent import (
|
||||
ToolDefinition, ToolResult
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ActionToolAdapter:
|
||||
"""Wraps existing Workflow-Actions as Agent-Tools.
|
||||
|
||||
Iterates over discovered methods, finds actions with dynamicMode=True,
|
||||
and registers them in the ToolRegistry with a compound name (method.action).
|
||||
"""
|
||||
|
||||
def __init__(self, actionExecutor):
|
||||
self._actionExecutor = actionExecutor
|
||||
self._registeredTools: List[str] = []
|
||||
|
||||
def registerAll(self, toolRegistry: ToolRegistry):
|
||||
"""Discover and register all dynamicMode actions as agent tools."""
|
||||
from modules.workflows.processing.shared.methodDiscovery import methods
|
||||
|
||||
registered = 0
|
||||
for methodName, methodInfo in methods.items():
|
||||
if not methodName[0].isupper():
|
||||
continue
|
||||
|
||||
shortName = methodName.replace("Method", "").lower()
|
||||
methodInstance = methodInfo["instance"]
|
||||
|
||||
for actionName, actionInfo in methodInfo["actions"].items():
|
||||
actionDef = methodInstance._actions.get(actionName)
|
||||
if not actionDef or not getattr(actionDef, "dynamicMode", False):
|
||||
continue
|
||||
|
||||
compoundName = f"{shortName}.{actionName}"
|
||||
toolDef = _buildToolDefinition(compoundName, actionDef, actionInfo)
|
||||
|
||||
handler = _createDispatchHandler(self._actionExecutor, shortName, actionName)
|
||||
toolRegistry.registerFromDefinition(toolDef, handler)
|
||||
self._registeredTools.append(compoundName)
|
||||
registered += 1
|
||||
|
||||
logger.info(f"ActionToolAdapter: registered {registered} tools from workflow actions")
|
||||
|
||||
@property
|
||||
def registeredTools(self) -> List[str]:
|
||||
"""Names of all tools registered by this adapter."""
|
||||
return list(self._registeredTools)
|
||||
|
||||
|
||||
def _buildToolDefinition(compoundName: str, actionDef, actionInfo: Dict[str, Any]) -> ToolDefinition:
|
||||
"""Build a ToolDefinition from a WorkflowActionDefinition."""
|
||||
parameters = _convertParameterSchema(actionInfo.get("parameters", {}))
|
||||
|
||||
return ToolDefinition(
|
||||
name=compoundName,
|
||||
description=actionDef.description or actionInfo.get("description", ""),
|
||||
parameters=parameters,
|
||||
readOnly=False
|
||||
)
|
||||
|
||||
|
||||
def _convertParameterSchema(actionParams: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert workflow action parameter schema to JSON Schema for tool definitions."""
|
||||
properties = {}
|
||||
required = []
|
||||
|
||||
for paramName, paramInfo in actionParams.items():
|
||||
paramType = paramInfo.get("type", "str") if isinstance(paramInfo, dict) else "str"
|
||||
paramDesc = paramInfo.get("description", "") if isinstance(paramInfo, dict) else ""
|
||||
paramRequired = paramInfo.get("required", False) if isinstance(paramInfo, dict) else False
|
||||
|
||||
jsonType = _pythonTypeToJsonType(paramType)
|
||||
properties[paramName] = {
|
||||
"type": jsonType,
|
||||
"description": paramDesc
|
||||
}
|
||||
|
||||
if paramRequired:
|
||||
required.append(paramName)
|
||||
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": properties,
|
||||
"required": required
|
||||
}
|
||||
|
||||
|
||||
def _pythonTypeToJsonType(pythonType: str) -> str:
|
||||
"""Map Python type strings to JSON Schema types."""
|
||||
mapping = {
|
||||
"str": "string",
|
||||
"int": "integer",
|
||||
"float": "number",
|
||||
"bool": "boolean",
|
||||
"list": "array",
|
||||
"dict": "object",
|
||||
"List[str]": "array",
|
||||
"List[int]": "array",
|
||||
"List[dict]": "array",
|
||||
"Dict[str, Any]": "object",
|
||||
}
|
||||
return mapping.get(pythonType, "string")
|
||||
|
||||
|
||||
def _createDispatchHandler(actionExecutor, methodName: str, actionName: str):
|
||||
"""Create an async handler that dispatches to the ActionExecutor."""
|
||||
async def _handler(args: Dict[str, Any], context: Dict[str, Any]) -> ToolResult:
|
||||
try:
|
||||
result = await actionExecutor.executeAction(methodName, actionName, args)
|
||||
data = _formatActionResult(result)
|
||||
return ToolResult(
|
||||
toolCallId="",
|
||||
toolName=f"{methodName}.{actionName}",
|
||||
success=result.success,
|
||||
data=data,
|
||||
error=result.error
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"ActionToolAdapter dispatch failed for {methodName}.{actionName}: {e}")
|
||||
return ToolResult(
|
||||
toolCallId="",
|
||||
toolName=f"{methodName}.{actionName}",
|
||||
success=False,
|
||||
error=str(e)
|
||||
)
|
||||
return _handler
|
||||
|
||||
|
||||
def _formatActionResult(result) -> str:
|
||||
"""Format an ActionResult into a text representation for the agent."""
|
||||
parts = []
|
||||
|
||||
if result.resultLabel:
|
||||
parts.append(f"Result: {result.resultLabel}")
|
||||
|
||||
if result.error:
|
||||
parts.append(f"Error: {result.error}")
|
||||
|
||||
if result.documents:
|
||||
parts.append(f"Documents ({len(result.documents)}):")
|
||||
for doc in result.documents:
|
||||
docName = getattr(doc, "documentName", "unnamed")
|
||||
docType = getattr(doc, "mimeType", "unknown")
|
||||
parts.append(f" - {docName} ({docType})")
|
||||
docData = getattr(doc, "documentData", None)
|
||||
if docData and isinstance(docData, str) and len(docData) < 2000:
|
||||
parts.append(f" Content: {docData[:2000]}")
|
||||
|
||||
if not parts:
|
||||
parts.append("Action completed successfully." if result.success else "Action failed.")
|
||||
|
||||
return "\n".join(parts)
|
||||
406
modules/serviceCenter/services/serviceAgent/agentLoop.py
Normal file
406
modules/serviceCenter/services/serviceAgent/agentLoop.py
Normal file
|
|
@ -0,0 +1,406 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Agent loop: ReAct pattern with native function calling, budget control, and error handling."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
import re
|
||||
from typing import List, Dict, Any, Optional, AsyncGenerator, Callable, Awaitable
|
||||
|
||||
from modules.datamodels.datamodelAi import (
|
||||
AiCallRequest, AiCallOptions, AiCallResponse, OperationTypeEnum
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelAgent import (
|
||||
AgentState, AgentStatusEnum, AgentConfig, AgentEvent, AgentEventTypeEnum,
|
||||
ToolCallRequest, ToolResult, ToolCallLog, AgentRoundLog, AgentTrace
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
|
||||
from modules.serviceCenter.services.serviceAgent.conversationManager import (
|
||||
ConversationManager, buildSystemPrompt
|
||||
)
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_RETRIES_PER_TOOL = 3
|
||||
RETRY_BASE_DELAY_S = 1.0
|
||||
|
||||
|
||||
async def runAgentLoop(
|
||||
prompt: str,
|
||||
toolRegistry: ToolRegistry,
|
||||
config: AgentConfig,
|
||||
aiCallFn: Callable[[AiCallRequest], Awaitable[AiCallResponse]],
|
||||
getWorkflowCostFn: Callable[[], Awaitable[float]],
|
||||
workflowId: str,
|
||||
userId: str = "",
|
||||
featureInstanceId: str = "",
|
||||
buildRagContextFn: Callable[..., Awaitable[str]] = None,
|
||||
mandateId: str = "",
|
||||
aiCallStreamFn: Callable = None,
|
||||
userLanguage: str = "",
|
||||
) -> AsyncGenerator[AgentEvent, None]:
|
||||
"""Run the agent loop. Yields AgentEvent for each step (SSE-ready).
|
||||
|
||||
Args:
|
||||
prompt: User prompt
|
||||
toolRegistry: Registry with available tools
|
||||
config: Agent configuration (maxRounds, maxCostCHF, etc.)
|
||||
aiCallFn: Function to call the AI (wraps serviceAi.callAi with billing)
|
||||
getWorkflowCostFn: Function to get current workflow cost
|
||||
workflowId: Workflow ID for tracking
|
||||
userId: User ID for tracing
|
||||
featureInstanceId: Feature instance ID for tracing
|
||||
buildRagContextFn: Optional async function to build RAG context before each round
|
||||
mandateId: Mandate ID for RAG scoping
|
||||
userLanguage: ISO 639-1 language code for agent responses
|
||||
"""
|
||||
state = AgentState(workflowId=workflowId, maxRounds=config.maxRounds)
|
||||
trace = AgentTrace(
|
||||
workflowId=workflowId, userId=userId,
|
||||
featureInstanceId=featureInstanceId
|
||||
)
|
||||
|
||||
tools = toolRegistry.getTools()
|
||||
toolDefinitions = toolRegistry.formatToolsForFunctionCalling()
|
||||
toolsText = toolRegistry.formatToolsForPrompt()
|
||||
|
||||
systemPrompt = buildSystemPrompt(tools, toolsText, userLanguage=userLanguage)
|
||||
conversation = ConversationManager(systemPrompt)
|
||||
conversation.addUserMessage(prompt)
|
||||
|
||||
while state.status == AgentStatusEnum.RUNNING and state.currentRound < state.maxRounds:
|
||||
await asyncio.sleep(0)
|
||||
state.currentRound += 1
|
||||
roundStartTime = time.time()
|
||||
roundLog = AgentRoundLog(roundNumber=state.currentRound)
|
||||
|
||||
# RAG context injection (before each round for fresh relevance)
|
||||
if buildRagContextFn:
|
||||
try:
|
||||
latestUserMsg = ""
|
||||
for msg in reversed(conversation.messages):
|
||||
if msg.get("role") == "user":
|
||||
latestUserMsg = msg.get("content", "")
|
||||
break
|
||||
ragContext = await buildRagContextFn(
|
||||
currentPrompt=latestUserMsg or prompt,
|
||||
workflowId=workflowId,
|
||||
userId=userId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
mandateId=mandateId,
|
||||
)
|
||||
if ragContext:
|
||||
conversation.injectRagContext(ragContext)
|
||||
except Exception as ragErr:
|
||||
logger.warning(f"RAG context injection failed (non-blocking): {ragErr}")
|
||||
|
||||
# Budget check
|
||||
budgetExceeded = await _checkBudget(config, getWorkflowCostFn)
|
||||
if budgetExceeded:
|
||||
state.status = AgentStatusEnum.BUDGET_EXCEEDED
|
||||
state.abortReason = "Workflow cost budget exceeded"
|
||||
yield AgentEvent(
|
||||
type=AgentEventTypeEnum.FINAL,
|
||||
content=_buildProgressSummary(state, "Budget exceeded. Here is the progress so far.")
|
||||
)
|
||||
break
|
||||
|
||||
logger.info(f"Agent round {state.currentRound}/{state.maxRounds} for workflow {workflowId} (tools={state.totalToolCalls}, cost={state.totalCostCHF:.4f})")
|
||||
yield AgentEvent(
|
||||
type=AgentEventTypeEnum.AGENT_PROGRESS,
|
||||
data={
|
||||
"round": state.currentRound,
|
||||
"maxRounds": state.maxRounds,
|
||||
"totalAiCalls": state.totalAiCalls,
|
||||
"totalToolCalls": state.totalToolCalls,
|
||||
"costCHF": state.totalCostCHF
|
||||
}
|
||||
)
|
||||
|
||||
# Progressive summarization
|
||||
if conversation.needsSummarization(state.currentRound):
|
||||
async def _summarizeCall(summaryPrompt: str) -> str:
|
||||
req = AiCallRequest(
|
||||
prompt=summaryPrompt,
|
||||
options=AiCallOptions(operationType=OperationTypeEnum.DATA_ANALYSE)
|
||||
)
|
||||
resp = await aiCallFn(req)
|
||||
state.totalCostCHF += resp.priceCHF
|
||||
state.totalAiCalls += 1
|
||||
return resp.content
|
||||
|
||||
await conversation.summarize(state.currentRound, _summarizeCall)
|
||||
|
||||
# AI call
|
||||
aiRequest = AiCallRequest(
|
||||
prompt="",
|
||||
options=AiCallOptions(
|
||||
operationType=OperationTypeEnum.AGENT,
|
||||
temperature=config.temperature
|
||||
),
|
||||
messages=conversation.messages,
|
||||
tools=toolDefinitions
|
||||
)
|
||||
|
||||
try:
|
||||
aiResponse = None
|
||||
streamedText = ""
|
||||
isFirstChunkOfRound = True
|
||||
|
||||
if aiCallStreamFn:
|
||||
async for chunk in aiCallStreamFn(aiRequest):
|
||||
if isinstance(chunk, str):
|
||||
if isFirstChunkOfRound and state.currentRound > 1:
|
||||
chunk = "\n\n" + chunk
|
||||
isFirstChunkOfRound = False
|
||||
elif isFirstChunkOfRound:
|
||||
isFirstChunkOfRound = False
|
||||
streamedText += chunk
|
||||
yield AgentEvent(type=AgentEventTypeEnum.CHUNK, content=chunk)
|
||||
else:
|
||||
aiResponse = chunk
|
||||
|
||||
if aiResponse is None:
|
||||
raise RuntimeError("Stream ended without final AiCallResponse")
|
||||
else:
|
||||
aiResponse = await aiCallFn(aiRequest)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"AI call failed in round {state.currentRound}: {e}", exc_info=True)
|
||||
state.status = AgentStatusEnum.ERROR
|
||||
state.abortReason = f"AI call error: {e}"
|
||||
yield AgentEvent(type=AgentEventTypeEnum.ERROR, content=str(e))
|
||||
break
|
||||
|
||||
state.totalAiCalls += 1
|
||||
state.totalCostCHF += aiResponse.priceCHF
|
||||
state.totalProcessingTime += aiResponse.processingTime
|
||||
roundLog.aiModel = aiResponse.modelName
|
||||
roundLog.costCHF = aiResponse.priceCHF
|
||||
|
||||
if aiResponse.errorCount > 0:
|
||||
state.status = AgentStatusEnum.ERROR
|
||||
state.abortReason = f"AI returned error: {aiResponse.content}"
|
||||
yield AgentEvent(type=AgentEventTypeEnum.ERROR, content=aiResponse.content)
|
||||
break
|
||||
|
||||
# Parse response for tool calls
|
||||
toolCalls = _parseToolCalls(aiResponse)
|
||||
textContent = _extractTextContent(aiResponse)
|
||||
|
||||
if textContent and not streamedText:
|
||||
yield AgentEvent(type=AgentEventTypeEnum.MESSAGE, content=textContent)
|
||||
|
||||
if not toolCalls:
|
||||
state.status = AgentStatusEnum.COMPLETED
|
||||
conversation.addAssistantMessage(aiResponse.content)
|
||||
roundLog.durationMs = int((time.time() - roundStartTime) * 1000)
|
||||
trace.rounds.append(roundLog)
|
||||
yield AgentEvent(type=AgentEventTypeEnum.FINAL, content=textContent or aiResponse.content)
|
||||
break
|
||||
|
||||
# Add assistant message with tool calls to conversation
|
||||
assistantToolCalls = _formatAssistantToolCalls(toolCalls)
|
||||
conversation.addAssistantMessage(textContent or "", assistantToolCalls)
|
||||
|
||||
# Execute tool calls
|
||||
for tc in toolCalls:
|
||||
yield AgentEvent(
|
||||
type=AgentEventTypeEnum.TOOL_CALL,
|
||||
data={"toolName": tc.name, "args": tc.args}
|
||||
)
|
||||
|
||||
results = await _executeToolCalls(toolCalls, toolRegistry, {
|
||||
"workflowId": workflowId,
|
||||
"userId": userId,
|
||||
"featureInstanceId": featureInstanceId,
|
||||
"mandateId": mandateId,
|
||||
})
|
||||
state.totalToolCalls += len(results)
|
||||
|
||||
for result in results:
|
||||
roundLog.toolCalls.append(ToolCallLog(
|
||||
toolName=result.toolName,
|
||||
args=next((tc.args for tc in toolCalls if tc.id == result.toolCallId), {}),
|
||||
success=result.success,
|
||||
durationMs=result.durationMs,
|
||||
error=result.error
|
||||
))
|
||||
if not result.success:
|
||||
logger.warning(f"Tool '{result.toolName}' failed: {result.error}")
|
||||
yield AgentEvent(
|
||||
type=AgentEventTypeEnum.TOOL_RESULT,
|
||||
data={
|
||||
"toolName": result.toolName,
|
||||
"success": result.success,
|
||||
"data": result.data[:500] if result.data else "",
|
||||
"error": result.error
|
||||
}
|
||||
)
|
||||
if result.sideEvents:
|
||||
for sideEvt in result.sideEvents:
|
||||
evtType = sideEvt.get("type", "")
|
||||
try:
|
||||
evtEnum = AgentEventTypeEnum(evtType)
|
||||
except (ValueError, KeyError):
|
||||
continue
|
||||
yield AgentEvent(
|
||||
type=evtEnum,
|
||||
data=sideEvt.get("data"),
|
||||
content=sideEvt.get("content"),
|
||||
)
|
||||
|
||||
# Add tool results to conversation
|
||||
toolResultMessages = [
|
||||
{"toolCallId": r.toolCallId, "toolName": r.toolName,
|
||||
"content": r.data if r.success else f"Error: {r.error}"}
|
||||
for r in results
|
||||
]
|
||||
conversation.addToolResults(toolResultMessages)
|
||||
|
||||
roundLog.durationMs = int((time.time() - roundStartTime) * 1000)
|
||||
trace.rounds.append(roundLog)
|
||||
|
||||
# maxRounds reached
|
||||
if state.currentRound >= state.maxRounds and state.status == AgentStatusEnum.RUNNING:
|
||||
state.status = AgentStatusEnum.MAX_ROUNDS_REACHED
|
||||
state.abortReason = f"Maximum rounds ({state.maxRounds}) reached"
|
||||
yield AgentEvent(
|
||||
type=AgentEventTypeEnum.FINAL,
|
||||
content=_buildProgressSummary(state, "Maximum rounds reached.")
|
||||
)
|
||||
|
||||
# Agent summary
|
||||
trace.completedAt = getUtcTimestamp()
|
||||
trace.status = state.status
|
||||
trace.totalRounds = state.currentRound
|
||||
trace.totalToolCalls = state.totalToolCalls
|
||||
trace.totalCostCHF = state.totalCostCHF
|
||||
trace.abortReason = state.abortReason
|
||||
|
||||
yield AgentEvent(
|
||||
type=AgentEventTypeEnum.AGENT_SUMMARY,
|
||||
data={
|
||||
"rounds": state.currentRound,
|
||||
"totalAiCalls": state.totalAiCalls,
|
||||
"totalToolCalls": state.totalToolCalls,
|
||||
"costCHF": round(state.totalCostCHF, 4),
|
||||
"processingTime": round(state.totalProcessingTime, 2),
|
||||
"status": state.status.value,
|
||||
"abortReason": state.abortReason
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def _checkBudget(config: AgentConfig,
|
||||
getWorkflowCostFn: Callable[[], Awaitable[float]]) -> bool:
|
||||
"""Check if workflow budget is exceeded. Returns True if exceeded."""
|
||||
if config.maxCostCHF is None:
|
||||
return False
|
||||
try:
|
||||
currentCost = await getWorkflowCostFn()
|
||||
return currentCost > config.maxCostCHF
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not check workflow cost: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def _executeToolCalls(toolCalls: List[ToolCallRequest],
|
||||
toolRegistry: ToolRegistry,
|
||||
context: Dict[str, Any]) -> List[ToolResult]:
|
||||
"""Execute tool calls: readOnly tools in parallel, others sequentially."""
|
||||
readOnlyCalls = [tc for tc in toolCalls if toolRegistry.isReadOnly(tc.name)]
|
||||
writeCalls = [tc for tc in toolCalls if not toolRegistry.isReadOnly(tc.name)]
|
||||
|
||||
results: Dict[str, ToolResult] = {}
|
||||
|
||||
if readOnlyCalls:
|
||||
readResults = await asyncio.gather(*[
|
||||
toolRegistry.dispatch(tc, context) for tc in readOnlyCalls
|
||||
])
|
||||
for tc, result in zip(readOnlyCalls, readResults):
|
||||
results[tc.id] = result
|
||||
|
||||
for tc in writeCalls:
|
||||
results[tc.id] = await toolRegistry.dispatch(tc, context)
|
||||
|
||||
return [results[tc.id] for tc in toolCalls]
|
||||
|
||||
|
||||
def _parseToolCalls(aiResponse: AiCallResponse) -> List[ToolCallRequest]:
|
||||
"""Parse tool calls from AI response. Supports native function calling and text-based fallback."""
|
||||
toolCalls = []
|
||||
|
||||
# Native function calling: check response metadata
|
||||
if hasattr(aiResponse, 'toolCalls') and aiResponse.toolCalls:
|
||||
for tc in aiResponse.toolCalls:
|
||||
rawArgs = tc["function"]["arguments"]
|
||||
if isinstance(rawArgs, str):
|
||||
rawArgs = rawArgs.strip()
|
||||
try:
|
||||
parsedArgs = json.loads(rawArgs) if rawArgs else {}
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(f"Failed to parse tool args for '{tc['function']['name']}': {rawArgs[:200]}")
|
||||
parsedArgs = {}
|
||||
else:
|
||||
parsedArgs = rawArgs if rawArgs else {}
|
||||
toolCalls.append(ToolCallRequest(
|
||||
id=tc.get("id", str(len(toolCalls))),
|
||||
name=tc["function"]["name"],
|
||||
args=parsedArgs,
|
||||
))
|
||||
return toolCalls
|
||||
|
||||
# Text-based fallback: parse ```tool_call blocks
|
||||
content = aiResponse.content or ""
|
||||
pattern = r"```tool_call\s*\n\s*tool:\s*(\S+)\s*\n\s*args:\s*(\{.*?\})\s*\n\s*```"
|
||||
matches = re.finditer(pattern, content, re.DOTALL)
|
||||
|
||||
for match in matches:
|
||||
toolName = match.group(1).strip()
|
||||
argsStr = match.group(2).strip()
|
||||
try:
|
||||
args = json.loads(argsStr)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(f"Failed to parse tool args for '{toolName}': {argsStr}")
|
||||
args = {}
|
||||
toolCalls.append(ToolCallRequest(name=toolName, args=args))
|
||||
|
||||
return toolCalls
|
||||
|
||||
|
||||
def _extractTextContent(aiResponse: AiCallResponse) -> str:
|
||||
"""Extract text content from AI response, removing tool_call blocks."""
|
||||
content = aiResponse.content or ""
|
||||
cleaned = re.sub(r"```tool_call\s*\n.*?\n\s*```", "", content, flags=re.DOTALL)
|
||||
return cleaned.strip()
|
||||
|
||||
|
||||
def _formatAssistantToolCalls(toolCalls: List[ToolCallRequest]) -> List[Dict[str, Any]]:
|
||||
"""Format tool calls for the conversation history (OpenAI tool_calls format)."""
|
||||
return [
|
||||
{
|
||||
"id": tc.id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tc.name,
|
||||
"arguments": json.dumps(tc.args)
|
||||
}
|
||||
}
|
||||
for tc in toolCalls
|
||||
]
|
||||
|
||||
|
||||
def _buildProgressSummary(state: AgentState, reason: str) -> str:
|
||||
"""Build a human-readable summary of agent progress for graceful termination."""
|
||||
return (
|
||||
f"{reason}\n\n"
|
||||
f"Progress after {state.currentRound} rounds:\n"
|
||||
f"- AI calls: {state.totalAiCalls}\n"
|
||||
f"- Tool calls: {state.totalToolCalls}\n"
|
||||
f"- Cost: {state.totalCostCHF:.4f} CHF\n"
|
||||
f"- Processing time: {state.totalProcessingTime:.1f}s"
|
||||
)
|
||||
|
|
@ -0,0 +1,280 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Conversation manager for the Agent service.
|
||||
Handles message history, context window management, and progressive summarization."""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolDefinition
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
FIRST_SUMMARY_ROUND = 4
|
||||
META_SUMMARY_ROUND = 7
|
||||
KEEP_RECENT_MESSAGES = 4
|
||||
MAX_ESTIMATED_TOKENS = 60000
|
||||
|
||||
|
||||
class ConversationManager:
|
||||
"""Manages the conversation history and context window for agent runs.
|
||||
|
||||
Progressive summarization strategy:
|
||||
- Rounds 1-3: full conversation retained
|
||||
- Round 4+: older messages compressed into a running summary
|
||||
- Round 7+: meta-summary replaces prior summaries
|
||||
Supports RAG context injection before each round via injectRagContext."""
|
||||
|
||||
def __init__(self, systemPrompt: str):
|
||||
self._messages: List[Dict[str, Any]] = [
|
||||
{"role": "system", "content": systemPrompt}
|
||||
]
|
||||
self._summaries: List[Dict[str, Any]] = []
|
||||
self._lastSummarizedRound: int = 0
|
||||
self._ragContextInjected: bool = False
|
||||
|
||||
@property
|
||||
def messages(self) -> List[Dict[str, Any]]:
|
||||
"""Current messages for the next AI call (internal markers stripped)."""
|
||||
return [
|
||||
{k: v for k, v in msg.items() if not k.startswith("_")}
|
||||
for msg in self._messages
|
||||
]
|
||||
|
||||
def addUserMessage(self, content: str):
|
||||
"""Add a user message."""
|
||||
self._messages.append({"role": "user", "content": content})
|
||||
|
||||
def addAssistantMessage(self, content: str, toolCalls: List[Dict[str, Any]] = None):
|
||||
"""Add an assistant message, optionally with tool calls."""
|
||||
msg: Dict[str, Any] = {"role": "assistant", "content": content}
|
||||
if toolCalls:
|
||||
msg["tool_calls"] = toolCalls
|
||||
self._messages.append(msg)
|
||||
|
||||
def addToolResults(self, results: List[Dict[str, Any]]):
|
||||
"""Add tool results to the conversation.
|
||||
Each result: {toolCallId, toolName, content}."""
|
||||
for result in results:
|
||||
self._messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": result["toolCallId"],
|
||||
"content": result["content"]
|
||||
})
|
||||
|
||||
def addToolResultsAsText(self, resultText: str):
|
||||
"""Add combined tool results as a user message (text-based fallback)."""
|
||||
self._messages.append({
|
||||
"role": "user",
|
||||
"content": f"Tool Results:\n{resultText}"
|
||||
})
|
||||
|
||||
def injectRagContext(self, ragContext: str):
|
||||
"""Inject RAG context as a system message right after the main system prompt.
|
||||
|
||||
Called before each agent round by the agent loop if KnowledgeService is available.
|
||||
Replaces any previously injected RAG context to keep the context fresh."""
|
||||
if not ragContext:
|
||||
return
|
||||
|
||||
ragMessage = {
|
||||
"role": "system",
|
||||
"content": f"Relevant Knowledge (from indexed documents and workflow context):\n{ragContext}",
|
||||
"_isRagContext": True,
|
||||
}
|
||||
|
||||
# Replace existing RAG message if present, otherwise insert after system prompt
|
||||
for i, msg in enumerate(self._messages):
|
||||
if msg.get("_isRagContext"):
|
||||
self._messages[i] = ragMessage
|
||||
self._ragContextInjected = True
|
||||
return
|
||||
|
||||
# Insert after the first system prompt
|
||||
self._messages.insert(1, ragMessage)
|
||||
self._ragContextInjected = True
|
||||
|
||||
def getMessageCount(self) -> int:
|
||||
"""Get the number of messages (excluding system prompt)."""
|
||||
return len(self._messages) - 1
|
||||
|
||||
def estimateTokenCount(self) -> int:
|
||||
"""Rough estimate of total tokens in the conversation (4 chars ≈ 1 token)."""
|
||||
totalChars = sum(len(str(m.get("content", ""))) for m in self._messages)
|
||||
return totalChars // 4
|
||||
|
||||
def needsSummarization(self, currentRound: int) -> bool:
|
||||
"""Check if progressive summarization should be triggered.
|
||||
|
||||
Triggers:
|
||||
- At round FIRST_SUMMARY_ROUND (4) if not yet summarized
|
||||
- At round META_SUMMARY_ROUND (7) for meta-summary
|
||||
- Every 5 rounds after that
|
||||
- When estimated token count exceeds MAX_ESTIMATED_TOKENS
|
||||
"""
|
||||
if currentRound >= FIRST_SUMMARY_ROUND and self._lastSummarizedRound < currentRound:
|
||||
if currentRound == FIRST_SUMMARY_ROUND or currentRound == META_SUMMARY_ROUND:
|
||||
return True
|
||||
if (currentRound - META_SUMMARY_ROUND) % 5 == 0 and currentRound > META_SUMMARY_ROUND:
|
||||
return True
|
||||
if self.estimateTokenCount() > MAX_ESTIMATED_TOKENS:
|
||||
return True
|
||||
return False
|
||||
|
||||
async def summarize(self, currentRound: int, aiCallFn) -> Optional[str]:
|
||||
"""Perform progressive summarization of older messages.
|
||||
|
||||
Rounds 1-3: full history retained, no summarization.
|
||||
Round 4+: compress older messages into a running summary.
|
||||
Round 7+: meta-summary that consolidates prior summaries.
|
||||
"""
|
||||
if currentRound < FIRST_SUMMARY_ROUND and self.estimateTokenCount() <= MAX_ESTIMATED_TOKENS:
|
||||
return None
|
||||
|
||||
systemMsgs = [m for m in self._messages if m.get("role") == "system"]
|
||||
nonSystemMessages = [m for m in self._messages if m.get("role") != "system"]
|
||||
|
||||
keepRecent = min(KEEP_RECENT_MESSAGES, len(nonSystemMessages))
|
||||
if len(nonSystemMessages) <= keepRecent + 1:
|
||||
return None
|
||||
|
||||
splitIdx = len(nonSystemMessages) - keepRecent
|
||||
# Ensure the split doesn't orphan tool messages from their assistant.
|
||||
# Walk backwards from splitIdx: if we're landing in the middle of a
|
||||
# tool-call sequence (assistant+tool_calls → tool → tool …), include
|
||||
# the entire sequence in recentMessages.
|
||||
while splitIdx > 0 and nonSystemMessages[splitIdx].get("role") == "tool":
|
||||
splitIdx -= 1
|
||||
# Also include the assistant message that triggered the tool calls.
|
||||
if splitIdx > 0 and splitIdx < len(nonSystemMessages) and \
|
||||
nonSystemMessages[splitIdx].get("role") == "assistant" and \
|
||||
nonSystemMessages[splitIdx].get("tool_calls"):
|
||||
pass # splitIdx already points at the assistant; keep it in recent
|
||||
elif splitIdx == 0:
|
||||
return None # nothing to summarize
|
||||
|
||||
messagesToSummarize = nonSystemMessages[:splitIdx]
|
||||
recentMessages = nonSystemMessages[splitIdx:]
|
||||
|
||||
summaryInput = _formatMessagesForSummary(messagesToSummarize)
|
||||
previousSummary = self._summaries[-1]["content"] if self._summaries else ""
|
||||
|
||||
isMetaSummary = currentRound >= META_SUMMARY_ROUND and len(self._summaries) >= 2
|
||||
summaryPrompt = _buildSummaryPrompt(summaryInput, previousSummary, isMetaSummary)
|
||||
|
||||
try:
|
||||
summaryText = await aiCallFn(summaryPrompt)
|
||||
except Exception as e:
|
||||
logger.error(f"Progressive summarization failed: {e}")
|
||||
return None
|
||||
|
||||
self._summaries.append({
|
||||
"round": currentRound,
|
||||
"content": summaryText,
|
||||
"isMeta": isMetaSummary,
|
||||
})
|
||||
self._lastSummarizedRound = currentRound
|
||||
|
||||
mainSystem = systemMsgs[0] if systemMsgs else {"role": "system", "content": ""}
|
||||
ragMessages = [m for m in systemMsgs if m.get("_isRagContext")]
|
||||
|
||||
self._messages = [
|
||||
mainSystem,
|
||||
*ragMessages,
|
||||
{"role": "system", "content": f"Conversation Summary (rounds 1-{currentRound - keepRecent}):\n{summaryText}"},
|
||||
*recentMessages,
|
||||
]
|
||||
|
||||
logger.info(
|
||||
f"Progressive summarization at round {currentRound}: "
|
||||
f"compressed {len(messagesToSummarize)} messages into "
|
||||
f"{'meta-' if isMetaSummary else ''}summary"
|
||||
)
|
||||
return summaryText
|
||||
|
||||
|
||||
def _formatMessagesForSummary(messages: List[Dict[str, Any]]) -> str:
|
||||
"""Format messages into a text block for summarization."""
|
||||
parts = []
|
||||
for msg in messages:
|
||||
role = msg.get("role", "unknown")
|
||||
content = msg.get("content", "")
|
||||
if role == "tool":
|
||||
toolName = msg.get("tool_call_id", "tool")
|
||||
parts.append(f"[Tool Result ({toolName})]:\n{content}")
|
||||
elif role == "assistant" and msg.get("tool_calls"):
|
||||
calls = msg["tool_calls"]
|
||||
callNames = [c.get("function", {}).get("name", "?") for c in calls]
|
||||
parts.append(f"[Assistant → Tool Calls: {', '.join(callNames)}]")
|
||||
if content:
|
||||
parts.append(f"[Assistant]: {content}")
|
||||
else:
|
||||
parts.append(f"[{role.capitalize()}]: {content}")
|
||||
return "\n\n".join(parts)
|
||||
|
||||
|
||||
def _buildSummaryPrompt(messagesText: str, previousSummary: str, isMetaSummary: bool = False) -> str:
|
||||
"""Build the prompt for progressive summarization."""
|
||||
if isMetaSummary:
|
||||
prompt = (
|
||||
"Create a comprehensive meta-summary consolidating the previous summary "
|
||||
"and the new messages. Preserve all key facts, decisions, entities (names, "
|
||||
"numbers, dates), tool results, and action outcomes. Be concise but complete.\n\n"
|
||||
)
|
||||
else:
|
||||
prompt = (
|
||||
"Summarize the following conversation concisely. Preserve all key facts, "
|
||||
"decisions, entities (names, numbers, dates), and tool results. "
|
||||
"Do not lose any important information.\n\n"
|
||||
)
|
||||
if previousSummary:
|
||||
prompt += f"Previous Summary:\n{previousSummary}\n\n"
|
||||
prompt += f"New Messages to Summarize:\n{messagesText}\n\nProvide a concise, factual summary:"
|
||||
return prompt
|
||||
|
||||
|
||||
_LANGUAGE_NAMES = {
|
||||
"de": "German", "en": "English", "fr": "French", "it": "Italian",
|
||||
"es": "Spanish", "pt": "Portuguese", "nl": "Dutch", "ja": "Japanese",
|
||||
"zh": "Chinese", "ko": "Korean", "ar": "Arabic", "ru": "Russian",
|
||||
}
|
||||
|
||||
|
||||
def buildSystemPrompt(
|
||||
tools: List[ToolDefinition],
|
||||
toolsFormatted: str = None,
|
||||
userLanguage: str = "",
|
||||
) -> str:
|
||||
"""Build the system prompt for the agent.
|
||||
|
||||
Args:
|
||||
tools: Available tool definitions.
|
||||
toolsFormatted: Pre-formatted tool descriptions for text-based fallback.
|
||||
userLanguage: ISO 639-1 language code (e.g. "de", "en"). The agent will
|
||||
respond in this language.
|
||||
"""
|
||||
langName = _LANGUAGE_NAMES.get(userLanguage, "")
|
||||
langInstruction = (
|
||||
f"IMPORTANT: Always respond in {langName} ({userLanguage}). "
|
||||
f"The user's language is {langName}. All your messages, explanations, "
|
||||
f"and summaries MUST be in {langName}. "
|
||||
f"Only use English for tool call arguments and technical identifiers.\n\n"
|
||||
) if langName else ""
|
||||
|
||||
prompt = (
|
||||
f"{langInstruction}"
|
||||
"You are an AI agent with access to tools. "
|
||||
"Use the provided tools to accomplish the user's task. "
|
||||
"Think step by step. Call tools when you need information or need to perform actions. "
|
||||
"When you have enough information to answer, respond directly without calling tools.\n\n"
|
||||
)
|
||||
if toolsFormatted:
|
||||
prompt += f"Available Tools:\n{toolsFormatted}\n\n"
|
||||
prompt += (
|
||||
"To call a tool, use this format:\n"
|
||||
"```tool_call\n"
|
||||
"tool: <tool_name>\n"
|
||||
'args: {"param": "value"}\n'
|
||||
"```\n\n"
|
||||
)
|
||||
return prompt
|
||||
132
modules/serviceCenter/services/serviceAgent/datamodelAgent.py
Normal file
132
modules/serviceCenter/services/serviceAgent/datamodelAgent.py
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Data models for the Agent service."""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from enum import Enum
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
import uuid
|
||||
|
||||
|
||||
class AgentStatusEnum(str, Enum):
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
MAX_ROUNDS_REACHED = "maxRoundsReached"
|
||||
BUDGET_EXCEEDED = "budgetExceeded"
|
||||
ERROR = "error"
|
||||
STOPPED = "stopped"
|
||||
|
||||
|
||||
class AgentEventTypeEnum(str, Enum):
|
||||
MESSAGE = "message"
|
||||
CHUNK = "chunk"
|
||||
TOOL_CALL = "toolCall"
|
||||
TOOL_RESULT = "toolResult"
|
||||
AGENT_PROGRESS = "agentProgress"
|
||||
AGENT_SUMMARY = "agentSummary"
|
||||
FILE_CREATED = "fileCreated"
|
||||
DATA_SOURCE_ACCESS = "dataSourceAccess"
|
||||
VOICE_RESPONSE = "voiceResponse"
|
||||
FINAL = "final"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
class ToolDefinition(BaseModel):
|
||||
"""Schema for a tool available to the agent."""
|
||||
name: str = Field(description="Unique tool name")
|
||||
description: str = Field(description="What this tool does")
|
||||
parameters: Dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="JSON Schema for tool parameters"
|
||||
)
|
||||
readOnly: bool = Field(
|
||||
default=False,
|
||||
description="If True, tool can run in parallel with other readOnly tools"
|
||||
)
|
||||
featureType: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Feature scope for this tool (None = available to all)"
|
||||
)
|
||||
|
||||
|
||||
class ToolCallRequest(BaseModel):
|
||||
"""A tool call requested by the AI model."""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
name: str
|
||||
args: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class ToolResult(BaseModel):
|
||||
"""Result from executing a tool."""
|
||||
toolCallId: str
|
||||
toolName: str
|
||||
success: bool = True
|
||||
data: str = ""
|
||||
error: Optional[str] = None
|
||||
durationMs: int = 0
|
||||
sideEvents: Optional[List[Dict[str, Any]]] = None
|
||||
|
||||
|
||||
class AgentEvent(BaseModel):
|
||||
"""Event emitted during agent execution for SSE streaming."""
|
||||
type: AgentEventTypeEnum
|
||||
content: Optional[str] = None
|
||||
data: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class AgentConfig(BaseModel):
|
||||
"""Configuration for an agent run."""
|
||||
maxRounds: int = Field(default=25, ge=1, le=100)
|
||||
maxCostCHF: Optional[float] = Field(default=None, ge=0.0)
|
||||
entityCacheEnabled: bool = Field(default=False)
|
||||
toolSet: str = Field(default="core")
|
||||
temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0)
|
||||
|
||||
|
||||
class AgentState(BaseModel):
|
||||
"""Tracks state across an agent loop execution."""
|
||||
workflowId: str
|
||||
currentRound: int = 0
|
||||
maxRounds: int = 25
|
||||
totalAiCalls: int = 0
|
||||
totalToolCalls: int = 0
|
||||
totalCostCHF: float = 0.0
|
||||
totalProcessingTime: float = 0.0
|
||||
status: AgentStatusEnum = AgentStatusEnum.RUNNING
|
||||
abortReason: Optional[str] = None
|
||||
|
||||
|
||||
class ToolCallLog(BaseModel):
|
||||
"""Log of a single tool call for observability."""
|
||||
toolName: str
|
||||
args: Dict[str, Any] = Field(default_factory=dict)
|
||||
success: bool = True
|
||||
durationMs: int = 0
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class AgentRoundLog(BaseModel):
|
||||
"""Log of a single agent round for observability."""
|
||||
roundNumber: int
|
||||
aiModel: str = ""
|
||||
inputTokens: int = 0
|
||||
outputTokens: int = 0
|
||||
costCHF: float = 0.0
|
||||
toolCalls: List[ToolCallLog] = Field(default_factory=list)
|
||||
durationMs: int = 0
|
||||
|
||||
|
||||
class AgentTrace(BaseModel):
|
||||
"""Full trace of an agent workflow for observability."""
|
||||
workflowId: str
|
||||
userId: str = ""
|
||||
featureInstanceId: str = ""
|
||||
startedAt: float = Field(default_factory=getUtcTimestamp)
|
||||
completedAt: Optional[float] = None
|
||||
status: AgentStatusEnum = AgentStatusEnum.RUNNING
|
||||
totalRounds: int = 0
|
||||
totalToolCalls: int = 0
|
||||
totalCostCHF: float = 0.0
|
||||
abortReason: Optional[str] = None
|
||||
rounds: List[AgentRoundLog] = Field(default_factory=list)
|
||||
1983
modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
Normal file
1983
modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
Normal file
File diff suppressed because it is too large
Load diff
150
modules/serviceCenter/services/serviceAgent/toolRegistry.py
Normal file
150
modules/serviceCenter/services/serviceAgent/toolRegistry.py
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Tool registry for the Agent service. Manages tool definitions and dispatch."""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, List, Any, Optional, Callable, Awaitable
|
||||
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelAgent import (
|
||||
ToolDefinition, ToolCallRequest, ToolResult
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ToolRegistry:
|
||||
"""Registry for agent tools. Handles registration, lookup, and dispatch."""
|
||||
|
||||
def __init__(self):
|
||||
self._tools: Dict[str, ToolDefinition] = {}
|
||||
self._handlers: Dict[str, Callable[..., Awaitable[ToolResult]]] = {}
|
||||
|
||||
def register(self, name: str, handler: Callable[..., Awaitable[ToolResult]],
|
||||
description: str = "", parameters: Dict[str, Any] = None,
|
||||
readOnly: bool = False, featureType: str = None):
|
||||
"""Register a tool with its handler function."""
|
||||
if name in self._tools:
|
||||
logger.warning(f"Tool '{name}' already registered, overwriting")
|
||||
|
||||
self._tools[name] = ToolDefinition(
|
||||
name=name,
|
||||
description=description,
|
||||
parameters=parameters or {},
|
||||
readOnly=readOnly,
|
||||
featureType=featureType
|
||||
)
|
||||
self._handlers[name] = handler
|
||||
logger.debug(f"Registered tool: {name} (readOnly={readOnly})")
|
||||
|
||||
def registerFromDefinition(self, definition: ToolDefinition,
|
||||
handler: Callable[..., Awaitable[ToolResult]]):
|
||||
"""Register a tool from a pre-built ToolDefinition."""
|
||||
self._tools[definition.name] = definition
|
||||
self._handlers[definition.name] = handler
|
||||
logger.debug(f"Registered tool: {definition.name} (readOnly={definition.readOnly})")
|
||||
|
||||
def unregister(self, name: str):
|
||||
"""Remove a tool from the registry."""
|
||||
self._tools.pop(name, None)
|
||||
self._handlers.pop(name, None)
|
||||
|
||||
def getTools(self, toolSet: str = None, featureType: str = None) -> List[ToolDefinition]:
|
||||
"""Get available tools, optionally filtered by toolSet or featureType."""
|
||||
tools = list(self._tools.values())
|
||||
if featureType:
|
||||
tools = [t for t in tools if t.featureType is None or t.featureType == featureType]
|
||||
return tools
|
||||
|
||||
def getToolNames(self) -> List[str]:
|
||||
"""Get names of all registered tools."""
|
||||
return list(self._tools.keys())
|
||||
|
||||
def getTool(self, name: str) -> Optional[ToolDefinition]:
|
||||
"""Get a single tool definition by name."""
|
||||
return self._tools.get(name)
|
||||
|
||||
def isReadOnly(self, name: str) -> bool:
|
||||
"""Check if a tool is marked as readOnly."""
|
||||
tool = self._tools.get(name)
|
||||
return tool.readOnly if tool else False
|
||||
|
||||
def isValidTool(self, name: str) -> bool:
|
||||
"""Check if a tool name is valid (registered)."""
|
||||
return name in self._tools
|
||||
|
||||
async def dispatch(self, toolCall: ToolCallRequest, context: Dict[str, Any] = None) -> ToolResult:
|
||||
"""Execute a tool call and return the result."""
|
||||
startTime = time.time()
|
||||
|
||||
if not self.isValidTool(toolCall.name):
|
||||
return ToolResult(
|
||||
toolCallId=toolCall.id,
|
||||
toolName=toolCall.name,
|
||||
success=False,
|
||||
error=f"Unknown tool: '{toolCall.name}'. Available: {', '.join(self.getToolNames())}"
|
||||
)
|
||||
|
||||
handler = self._handlers[toolCall.name]
|
||||
argsSummary = ", ".join(f"{k}={str(v)[:80]}" for k, v in (toolCall.args or {}).items())
|
||||
logger.info(f"Tool dispatch: {toolCall.name}({argsSummary})")
|
||||
try:
|
||||
result = await handler(toolCall.args, context or {})
|
||||
durationMs = int((time.time() - startTime) * 1000)
|
||||
|
||||
if isinstance(result, ToolResult):
|
||||
result.toolCallId = toolCall.id
|
||||
result.durationMs = durationMs
|
||||
dataSummary = (result.data[:200] + "...") if result.data and len(result.data) > 200 else (result.data or "")
|
||||
if result.success:
|
||||
logger.info(f"Tool result: {toolCall.name} OK ({durationMs}ms) → {dataSummary}")
|
||||
else:
|
||||
logger.warning(f"Tool result: {toolCall.name} FAILED ({durationMs}ms) → {result.error}")
|
||||
return result
|
||||
|
||||
return ToolResult(
|
||||
toolCallId=toolCall.id,
|
||||
toolName=toolCall.name,
|
||||
success=True,
|
||||
data=str(result),
|
||||
durationMs=durationMs
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
durationMs = int((time.time() - startTime) * 1000)
|
||||
logger.error(f"Tool '{toolCall.name}' failed: {e}", exc_info=True)
|
||||
return ToolResult(
|
||||
toolCallId=toolCall.id,
|
||||
toolName=toolCall.name,
|
||||
success=False,
|
||||
error=str(e),
|
||||
durationMs=durationMs
|
||||
)
|
||||
|
||||
def formatToolsForPrompt(self) -> str:
|
||||
"""Format all tools as text for system prompt (text-based fallback)."""
|
||||
parts = []
|
||||
for tool in self._tools.values():
|
||||
paramStr = ", ".join(
|
||||
f"{k}: {v}" for k, v in tool.parameters.items()
|
||||
) if tool.parameters else "none"
|
||||
parts.append(f"- **{tool.name}**: {tool.description}\n Parameters: {{{paramStr}}}")
|
||||
return "\n".join(parts)
|
||||
|
||||
def formatToolsForFunctionCalling(self) -> List[Dict[str, Any]]:
|
||||
"""Format all tools as OpenAI-compatible function definitions for native function calling."""
|
||||
functions = []
|
||||
for tool in self._tools.values():
|
||||
functions.append({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tool.name,
|
||||
"description": tool.description,
|
||||
"parameters": tool.parameters if tool.parameters else {
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
"required": []
|
||||
}
|
||||
}
|
||||
})
|
||||
return functions
|
||||
|
|
@ -64,6 +64,10 @@ class _ServicesAdapter:
|
|||
def interfaceDbChat(self):
|
||||
return self._get_service("chat").interfaceDbChat
|
||||
|
||||
@property
|
||||
def interfaceDbComponent(self):
|
||||
return self._get_service("chat").interfaceDbComponent
|
||||
|
||||
@property
|
||||
def featureCode(self) -> Optional[str]:
|
||||
w = self.workflow
|
||||
|
|
@ -142,6 +146,8 @@ class AiService:
|
|||
3. billingCallback on aiObjects: records one billing transaction per model call
|
||||
with exact provider + model name (set before AI call, invoked by _callWithModel)
|
||||
"""
|
||||
await self.ensureAiObjectsInitialized()
|
||||
|
||||
# SPEECH_TEAMS: Dedicated pipeline, bypasses standard model selection
|
||||
if request.options and request.options.operationType == OperationTypeEnum.SPEECH_TEAMS:
|
||||
return await self._handleSpeechTeams(request)
|
||||
|
|
@ -171,14 +177,27 @@ class AiService:
|
|||
else:
|
||||
response = await self.aiObjects.callWithTextContext(request)
|
||||
finally:
|
||||
# Clear callback after call completes
|
||||
self.aiObjects.billingCallback = None
|
||||
|
||||
# Store workflow stats for analytics
|
||||
self._storeAiCallStats(response, request)
|
||||
|
||||
return response
|
||||
|
||||
async def callAiStream(self, request: AiCallRequest):
|
||||
"""Streaming variant of callAi. Yields str deltas during generation, then final AiCallResponse."""
|
||||
await self.ensureAiObjectsInitialized()
|
||||
self._preflightBillingCheck()
|
||||
await self._checkBillingBeforeAiCall()
|
||||
|
||||
effectiveProviders = self._calculateEffectiveProviders()
|
||||
if effectiveProviders and request.options:
|
||||
request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders})
|
||||
|
||||
self.aiObjects.billingCallback = self._createBillingCallback()
|
||||
try:
|
||||
async for chunk in self.aiObjects.callWithTextContextStream(request):
|
||||
yield chunk
|
||||
finally:
|
||||
self.aiObjects.billingCallback = None
|
||||
|
||||
# =========================================================================
|
||||
# SPEECH_TEAMS: Dedicated handler for Teams Meeting AI analysis
|
||||
# Bypasses standard model selection. Uses a fixed fast model.
|
||||
|
|
@ -295,9 +314,6 @@ class AiService:
|
|||
except Exception as e:
|
||||
logger.error(f"BILLING: Failed to record billing for SPEECH_TEAMS: {e}")
|
||||
|
||||
# Store stats
|
||||
self._storeAiCallStats(response, request)
|
||||
|
||||
logger.info(f"SPEECH_TEAMS call completed: model={model.name}, time={processingTime:.2f}s, cost={priceCHF:.4f} CHF")
|
||||
return response
|
||||
|
||||
|
|
@ -644,12 +660,12 @@ detectedIntent-Werte:
|
|||
billingService = getBillingService(user, mandateId, featureInstanceId, featureCode)
|
||||
|
||||
def _billingCallback(response) -> None:
|
||||
"""Record billing for a single AI model call."""
|
||||
"""Record billing transaction with full AI call metadata."""
|
||||
if not response or getattr(response, 'errorCount', 0) > 0:
|
||||
return
|
||||
|
||||
priceCHF = getattr(response, 'priceCHF', 0.0)
|
||||
if not priceCHF or priceCHF <= 0:
|
||||
basePriceCHF = getattr(response, 'priceCHF', 0.0)
|
||||
if not basePriceCHF or basePriceCHF <= 0:
|
||||
return
|
||||
|
||||
provider = getattr(response, 'provider', None) or 'unknown'
|
||||
|
|
@ -657,20 +673,24 @@ detectedIntent-Werte:
|
|||
|
||||
try:
|
||||
billingService.recordUsage(
|
||||
priceCHF=priceCHF,
|
||||
priceCHF=basePriceCHF,
|
||||
workflowId=workflowId,
|
||||
aicoreProvider=provider,
|
||||
aicoreModel=modelName,
|
||||
description=f"AI: {modelName}"
|
||||
description=f"AI: {modelName}",
|
||||
processingTime=getattr(response, 'processingTime', None),
|
||||
bytesSent=getattr(response, 'bytesSent', None),
|
||||
bytesReceived=getattr(response, 'bytesReceived', None),
|
||||
errorCount=getattr(response, 'errorCount', None)
|
||||
)
|
||||
logger.debug(
|
||||
f"Billed model call: {priceCHF:.4f} CHF, "
|
||||
f"Billed model call: {basePriceCHF:.4f} CHF, "
|
||||
f"provider={provider}, model={modelName}, mandate={mandateId}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"BILLING: Failed to record transaction! "
|
||||
f"Cost={priceCHF:.4f} CHF, user={user.id}, mandate={mandateId}, "
|
||||
f"Cost={basePriceCHF:.4f} CHF, user={user.id}, mandate={mandateId}, "
|
||||
f"provider={provider}, model={modelName}, error={e}"
|
||||
)
|
||||
|
||||
|
|
@ -723,40 +743,6 @@ detectedIntent-Werte:
|
|||
logger.warning(f"Error calculating effective providers: {e}")
|
||||
return None
|
||||
|
||||
def _storeAiCallStats(self, response, request: AiCallRequest) -> None:
|
||||
"""Store workflow stats after an AI call.
|
||||
|
||||
This method stores the AI call statistics (cost, processing time, bytes)
|
||||
to the workflow stats collection for tracking and billing purposes.
|
||||
|
||||
Args:
|
||||
response: AiCallResponse with cost/timing data
|
||||
request: Original AiCallRequest for context
|
||||
"""
|
||||
try:
|
||||
# Skip if no workflow context
|
||||
workflow = getattr(self.services, 'workflow', None)
|
||||
if not workflow or not hasattr(workflow, 'id') or not workflow.id:
|
||||
logger.debug("No workflow context - skipping stats storage")
|
||||
return
|
||||
|
||||
# Skip if response is an error
|
||||
if not response or getattr(response, 'errorCount', 0) > 0:
|
||||
logger.debug("Error response - skipping stats storage")
|
||||
return
|
||||
|
||||
# Determine process name from operation type
|
||||
opType = getattr(request.options, 'operationType', 'unknown') if request.options else 'unknown'
|
||||
process = f"ai.call.{opType}"
|
||||
|
||||
# Store the stat
|
||||
self.services.chat.storeWorkflowStat(workflow, response, process)
|
||||
logger.debug(f"Stored AI call stat: {process}, cost={getattr(response, 'priceCHF', 0):.4f} CHF")
|
||||
|
||||
except Exception as e:
|
||||
# Log but don't fail - stats storage is not critical
|
||||
logger.debug(f"Could not store AI call stat: {str(e)}")
|
||||
|
||||
async def ensureAiObjectsInitialized(self):
|
||||
"""Ensure aiObjects is initialized and submodules are ready."""
|
||||
if self.aiObjects is None:
|
||||
|
|
@ -766,17 +752,17 @@ detectedIntent-Werte:
|
|||
self._initializeSubmodules()
|
||||
|
||||
@classmethod
|
||||
async def create(cls, legacy_services) -> "AiService":
|
||||
"""Create AiService from legacy Services hub. For backward compatibility with tests."""
|
||||
async def create(cls, servicesHub) -> "AiService":
|
||||
"""Create AiService from a ServiceHub instance."""
|
||||
from modules.serviceCenter import getService
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
ctx = ServiceCenterContext(
|
||||
user=legacy_services.user,
|
||||
mandate_id=legacy_services.mandateId,
|
||||
feature_instance_id=legacy_services.featureInstanceId,
|
||||
workflow=getattr(legacy_services, "workflow", None),
|
||||
user=servicesHub.user,
|
||||
mandate_id=servicesHub.mandateId,
|
||||
feature_instance_id=servicesHub.featureInstanceId,
|
||||
workflow=getattr(servicesHub, "workflow", None),
|
||||
)
|
||||
return getService("ai", ctx, legacy_hub=legacy_services)
|
||||
return getService("ai", ctx)
|
||||
|
||||
# Helper methods
|
||||
|
||||
|
|
|
|||
|
|
@ -125,10 +125,11 @@ class AiCallLooper:
|
|||
logger.error(errorMsg)
|
||||
raise ValueError(errorMsg)
|
||||
|
||||
maxIterations = 50 # Prevent infinite loops
|
||||
maxIterations = 10
|
||||
iteration = 0
|
||||
allSections = [] # Accumulate all sections across iterations
|
||||
lastRawResponse = None # Store last raw JSON response for continuation
|
||||
result = ""
|
||||
allSections = []
|
||||
lastRawResponse = None
|
||||
|
||||
# JSON Base Iteration System:
|
||||
# - jsonBase: the merged JSON string (replaces accumulatedDirectJson array)
|
||||
|
|
|
|||
|
|
@ -261,35 +261,34 @@ class ContentExtractor:
|
|||
|
||||
# Check if it's standardized JSON format (has "documents" or "sections")
|
||||
if document.mimeType == "application/json":
|
||||
try:
|
||||
docBytes = self.services.interfaceDbComponent.getFileData(document.fileId)
|
||||
if docBytes:
|
||||
docBytes = self.services.interfaceDbComponent.getFileData(document.fileId)
|
||||
if docBytes:
|
||||
try:
|
||||
docData = docBytes.decode('utf-8')
|
||||
jsonData = json.loads(docData)
|
||||
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
||||
logger.warning(f"Could not parse JSON document {document.fileName}: {str(e)}")
|
||||
jsonData = None
|
||||
|
||||
if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData):
|
||||
logger.info(f"Document is already in standardized JSON format, using as reference")
|
||||
# Create reference ContentPart for structured JSON
|
||||
contentPart = ContentPart(
|
||||
id=f"ref_{document.id}",
|
||||
label=f"Reference: {document.fileName}",
|
||||
typeGroup="structure",
|
||||
mimeType="application/json",
|
||||
data=docData,
|
||||
metadata={
|
||||
"contentFormat": "reference",
|
||||
"documentId": document.id,
|
||||
"documentReference": f"docItem:{document.id}:{document.fileName}",
|
||||
"skipExtraction": True,
|
||||
"intent": "reference"
|
||||
}
|
||||
)
|
||||
allContentParts.append(contentPart)
|
||||
logger.info(f"✅ Using JSON document directly without extraction")
|
||||
continue # Skip normal extraction for this document
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not parse JSON document {document.fileName}, will extract normally: {str(e)}")
|
||||
# Continue with normal extraction
|
||||
if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData):
|
||||
logger.info(f"Document is already in standardized JSON format, using as reference")
|
||||
contentPart = ContentPart(
|
||||
id=f"ref_{document.id}",
|
||||
label=f"Reference: {document.fileName}",
|
||||
typeGroup="structure",
|
||||
mimeType="application/json",
|
||||
data=docData,
|
||||
metadata={
|
||||
"contentFormat": "reference",
|
||||
"documentId": document.id,
|
||||
"documentReference": f"docItem:{document.id}:{document.fileName}",
|
||||
"skipExtraction": True,
|
||||
"intent": "reference"
|
||||
}
|
||||
)
|
||||
allContentParts.append(contentPart)
|
||||
logger.info(f"✅ Using JSON document directly without extraction")
|
||||
continue
|
||||
|
||||
# Normal extraction path
|
||||
intent = getIntentForDocument(document.id, documentIntents)
|
||||
|
|
|
|||
|
|
@ -230,9 +230,12 @@ class DocumentIntentAnalyzer:
|
|||
else:
|
||||
logger.debug(f"JSON document {document.id} has no documentData (actionType={actionType})")
|
||||
|
||||
return None
|
||||
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
||||
logger.debug(f"Error parsing document {document.fileName}: {str(e)}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug(f"Error resolving pre-extracted document {document.fileName}: {str(e)}")
|
||||
logger.error(f"Error resolving pre-extracted document {document.fileName}: {str(e)}")
|
||||
return None
|
||||
|
||||
def _buildIntentAnalysisPrompt(
|
||||
|
|
|
|||
|
|
@ -330,17 +330,7 @@ class JsonMergeLogger:
|
|||
except Exception as e:
|
||||
logger.error(f"Failed to write merge log file: {e}")
|
||||
else:
|
||||
# No log file set - write individual file (fallback)
|
||||
currentFileDir = os.path.dirname(os.path.abspath(__file__))
|
||||
logDir = currentFileDir
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
logFilePath = os.path.join(logDir, f"{mergeId}.txt")
|
||||
try:
|
||||
with open(logFilePath, 'w', encoding='utf-8') as f:
|
||||
f.write(logContent)
|
||||
logger.info(f"JSON merge log written to: {logFilePath}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to write merge log file: {e}")
|
||||
logger.debug(f"JSON merge {mergeId} completed ({len(logContent)} chars log). Use initializeLogFile() to persist merge logs.")
|
||||
|
||||
# Clear buffer for next merge
|
||||
JsonMergeLogger._logBuffer = []
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ class StructureFiller:
|
|||
"""Handles filling document structure with content."""
|
||||
|
||||
# Default concurrency limit for parallel generation (chapters/sections)
|
||||
DEFAULT_MAX_CONCURRENT_GENERATION = 16
|
||||
DEFAULT_MAX_CONCURRENT_GENERATION = 5
|
||||
|
||||
def __init__(self, services, aiService):
|
||||
"""Initialize StructureFiller with service center and AI service access."""
|
||||
|
|
@ -568,11 +568,16 @@ class StructureFiller:
|
|||
all_sections_list: List[Dict[str, Any]],
|
||||
language: str,
|
||||
outputFormat: str = "txt",
|
||||
calculateOverallProgress: callable = None
|
||||
calculateOverallProgress: callable = None,
|
||||
preExtractedText: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Process a single section and return its elements.
|
||||
Used for parallel processing of sections within a chapter.
|
||||
|
||||
When preExtractedText is provided, the section uses the pre-extracted
|
||||
content directly in its prompt instead of sending raw content parts
|
||||
through the heavy extraction pipeline (avoids chunking + N*M AI calls).
|
||||
"""
|
||||
sectionId = section.get("id")
|
||||
sectionTitle = section.get("title", sectionId)
|
||||
|
|
@ -600,6 +605,149 @@ class StructureFiller:
|
|||
|
||||
elements = []
|
||||
|
||||
# --- Fast path: use pre-extracted text instead of raw content parts ---
|
||||
if preExtractedText and useAiCall and generationHint:
|
||||
logger.info(
|
||||
f"Section {sectionId}: Using pre-extracted text "
|
||||
f"({len(preExtractedText):,} chars) - lightweight AI path"
|
||||
)
|
||||
|
||||
for partId in contentPartIds:
|
||||
part = self._findContentPartById(partId, contentParts)
|
||||
if not part:
|
||||
continue
|
||||
cf = contentFormats.get(partId, part.metadata.get("contentFormat"))
|
||||
if cf == "reference":
|
||||
elements.append({
|
||||
"type": "reference",
|
||||
"documentReference": part.metadata.get("documentReference"),
|
||||
"label": part.metadata.get("usageHint", part.label)
|
||||
})
|
||||
elif cf == "object":
|
||||
if part.typeGroup == "image" and part.data:
|
||||
caption = (
|
||||
section.get("caption")
|
||||
or section.get("metadata", {}).get("caption")
|
||||
or part.metadata.get("caption", "")
|
||||
)
|
||||
elements.append({
|
||||
"type": "image",
|
||||
"content": {
|
||||
"base64Data": part.data,
|
||||
"altText": part.metadata.get("usageHint", part.label),
|
||||
"caption": caption
|
||||
},
|
||||
"caption": caption
|
||||
})
|
||||
|
||||
generationPrompt, templateStructure = self._buildSectionGenerationPrompt(
|
||||
section=section,
|
||||
contentParts=[],
|
||||
userPrompt=userPrompt,
|
||||
generationHint=generationHint,
|
||||
allSections=all_sections_list,
|
||||
sectionIndex=sectionIndex,
|
||||
isAggregation=False,
|
||||
language=language,
|
||||
outputFormat=outputFormat,
|
||||
preExtractedText=preExtractedText
|
||||
)
|
||||
|
||||
sectionOperationId = f"{fillOperationId}_section_{sectionId}"
|
||||
self.services.chat.progressLogStart(
|
||||
sectionOperationId,
|
||||
"Section Generation (Pre-extracted)",
|
||||
f"Section {sectionIndex + 1}/{totalSections}",
|
||||
f"{sectionTitle} (pre-extracted)",
|
||||
parentOperationId=chapterOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation")
|
||||
|
||||
operationType = OperationTypeEnum.DATA_ANALYSE
|
||||
options = AiCallOptions(
|
||||
operationType=operationType,
|
||||
priority=PriorityEnum.BALANCED,
|
||||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
aiResponseJson = await self.aiService.callAiWithLooping(
|
||||
prompt=generationPrompt,
|
||||
options=options,
|
||||
debugPrefix=f"{chapterId}_section_{sectionId}",
|
||||
promptBuilder=self.buildSectionPromptWithContinuation,
|
||||
promptArgs={
|
||||
"section": section,
|
||||
"contentParts": [],
|
||||
"userPrompt": userPrompt,
|
||||
"generationHint": generationHint,
|
||||
"allSections": all_sections_list,
|
||||
"sectionIndex": sectionIndex,
|
||||
"isAggregation": False,
|
||||
"templateStructure": templateStructure,
|
||||
"basePrompt": generationPrompt,
|
||||
"language": language
|
||||
},
|
||||
operationId=sectionOperationId,
|
||||
userPrompt=userPrompt,
|
||||
contentParts=None,
|
||||
useCaseId="section_content"
|
||||
)
|
||||
|
||||
try:
|
||||
from modules.shared.jsonUtils import tryParseJson, repairBrokenJson
|
||||
if isinstance(aiResponseJson, str) and ("---" in aiResponseJson or aiResponseJson.count("```json") > 1):
|
||||
generatedElements = self._extractAndMergeMultipleJsonBlocks(aiResponseJson, contentType, sectionId)
|
||||
else:
|
||||
parsedResponse, parseError, cleanedStr = tryParseJson(aiResponseJson)
|
||||
if parsedResponse is None:
|
||||
logger.warning(f"Section {sectionId}: tryParseJson failed, attempting repair")
|
||||
repairedStr = repairBrokenJson(aiResponseJson)
|
||||
parsedResponse, parseError2, _ = tryParseJson(repairedStr)
|
||||
|
||||
if parsedResponse and isinstance(parsedResponse, dict):
|
||||
generatedElements = parsedResponse.get("elements", [])
|
||||
elif parsedResponse and isinstance(parsedResponse, list):
|
||||
generatedElements = parsedResponse
|
||||
else:
|
||||
generatedElements = []
|
||||
except Exception as parseErr:
|
||||
logger.error(f"Section {sectionId}: JSON parse error: {parseErr}")
|
||||
generatedElements = []
|
||||
|
||||
self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content")
|
||||
|
||||
class _AiResponse:
|
||||
def __init__(self, content):
|
||||
self.content = content
|
||||
|
||||
responseElements = await self._processAiResponseForSection(
|
||||
aiResponse=_AiResponse(aiResponseJson),
|
||||
contentType=contentType,
|
||||
operationType=operationType,
|
||||
sectionId=sectionId,
|
||||
generationHint=generationHint,
|
||||
generatedElements=generatedElements,
|
||||
section=section
|
||||
)
|
||||
elements.extend(responseElements)
|
||||
self.services.chat.progressLogFinish(sectionOperationId, True)
|
||||
|
||||
except Exception as e:
|
||||
self.services.chat.progressLogFinish(sectionOperationId, False)
|
||||
logger.error(f"Error in pre-extracted section {sectionId}: {e}")
|
||||
elements.append({
|
||||
"type": "error",
|
||||
"message": f"Error processing section {sectionId}: {str(e)}",
|
||||
"sectionId": sectionId
|
||||
})
|
||||
|
||||
return elements
|
||||
|
||||
# --- Standard path: process content parts directly ---
|
||||
|
||||
# Prüfe ob Aggregation nötig ist
|
||||
needsAggregation = self._needsAggregation(
|
||||
contentType=contentType,
|
||||
|
|
@ -1507,6 +1655,156 @@ class StructureFiller:
|
|||
|
||||
return elements
|
||||
|
||||
async def _preExtractSharedContent(
|
||||
self,
|
||||
contentParts: List[ContentPart],
|
||||
allSectionTasks: List[Dict[str, Any]],
|
||||
userPrompt: str,
|
||||
parentOperationId: str
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
Pre-extract content from large/shared content parts ONCE before parallel
|
||||
section filling. Returns dict mapping sectionId -> pre-extracted text.
|
||||
|
||||
Extracts a comprehensive plain-text summary per content part, then gives
|
||||
ALL sections referencing that part the SAME summary. Each section's own
|
||||
generationHint focuses the AI on the relevant aspect during generation.
|
||||
|
||||
This eliminates the N*M AI call explosion where N sections each independently
|
||||
chunk and process the same M-byte content part through the extraction pipeline.
|
||||
"""
|
||||
SIZE_THRESHOLD = 100_000
|
||||
MIN_SHARED_SECTIONS = 2
|
||||
|
||||
partToSections: Dict[str, List[Dict[str, Any]]] = {}
|
||||
for task in allSectionTasks:
|
||||
section = task["section"]
|
||||
for partId in section.get("contentPartIds", []):
|
||||
if partId not in partToSections:
|
||||
partToSections[partId] = []
|
||||
partToSections[partId].append(section)
|
||||
|
||||
if not partToSections:
|
||||
return {}
|
||||
|
||||
preExtractedCache: Dict[str, str] = {}
|
||||
|
||||
for partId, sections in partToSections.items():
|
||||
part = self._findContentPartById(partId, contentParts)
|
||||
if not part:
|
||||
continue
|
||||
|
||||
contentFormat = part.metadata.get("contentFormat", "unknown")
|
||||
if contentFormat != "extracted":
|
||||
continue
|
||||
|
||||
if part.typeGroup in ("image", "binary"):
|
||||
continue
|
||||
if part.mimeType and (
|
||||
part.mimeType.startswith("image/")
|
||||
or part.mimeType.startswith("video/")
|
||||
or part.mimeType.startswith("audio/")
|
||||
):
|
||||
continue
|
||||
|
||||
partSize = len(part.data) if part.data else 0
|
||||
numSections = len(sections)
|
||||
|
||||
if numSections < MIN_SHARED_SECTIONS and partSize < SIZE_THRESHOLD:
|
||||
continue
|
||||
|
||||
fileName = part.metadata.get("originalFileName", partId)
|
||||
logger.info(
|
||||
f"Pre-extracting content part {partId} "
|
||||
f"({partSize:,} bytes, referenced by {numSections} sections)"
|
||||
)
|
||||
|
||||
topicLines = []
|
||||
for section in sections:
|
||||
hint = (
|
||||
section.get("generationHint")
|
||||
or section.get("generation_hint")
|
||||
or section.get("title", "")
|
||||
)
|
||||
topicLines.append(f"- {hint}")
|
||||
topicsText = "\n".join(topicLines)
|
||||
|
||||
extractionPrompt = (
|
||||
"# TASK: Extract key information from this document\n\n"
|
||||
"Extract ALL relevant information from the provided content as "
|
||||
"plain text. The extracted content will be used to generate a report "
|
||||
"covering the topics listed below.\n\n"
|
||||
f"## User Request\n{userPrompt}\n\n"
|
||||
f"## Report topics that need data\n{topicsText}\n\n"
|
||||
"## Instructions\n"
|
||||
"- Extract key facts, data points, timestamps, error messages, "
|
||||
"statistics, and specific findings\n"
|
||||
"- Organize by theme but output as PLAIN TEXT (not JSON)\n"
|
||||
"- Be comprehensive but concise - include specific data, "
|
||||
"skip generic filler\n"
|
||||
"- Include concrete examples with exact values from the source\n"
|
||||
"- Do NOT add commentary or analysis - just extract the raw data\n"
|
||||
)
|
||||
|
||||
try:
|
||||
self.services.chat.progressLogUpdate(
|
||||
parentOperationId, 0.05,
|
||||
f"Pre-extracting content from {fileName} ({partSize:,} bytes)..."
|
||||
)
|
||||
|
||||
def _preExtractionProgress(chunkProgress, message):
|
||||
mapped = 0.05 + chunkProgress * 0.05
|
||||
self.services.chat.progressLogUpdate(
|
||||
parentOperationId, mapped,
|
||||
f"Pre-extraction: {message}"
|
||||
)
|
||||
|
||||
request = AiCallRequest(
|
||||
prompt=extractionPrompt,
|
||||
contentParts=[part],
|
||||
options=AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||
priority=PriorityEnum.BALANCED,
|
||||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
)
|
||||
|
||||
checkWorkflowStopped(self.services)
|
||||
response = await self.aiService.callAi(request, progressCallback=_preExtractionProgress)
|
||||
responseText = response.content if hasattr(response, "content") else str(response)
|
||||
|
||||
if responseText and len(responseText.strip()) > 50:
|
||||
for section in sections:
|
||||
sId = section.get("id", "unknown")
|
||||
preExtractedCache[sId] = responseText
|
||||
logger.info(
|
||||
f"Pre-extraction of {partId} successful: "
|
||||
f"{len(responseText):,} chars summary for {numSections} sections"
|
||||
)
|
||||
self.services.chat.progressLogUpdate(
|
||||
parentOperationId, 0.10,
|
||||
f"Pre-extraction complete ({len(responseText):,} chars). Starting section generation..."
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Pre-extraction of {partId} returned empty/short response "
|
||||
f"({len(responseText) if responseText else 0} chars), "
|
||||
"sections will fall back to direct extraction"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Pre-extraction of {partId} failed: {e}. "
|
||||
"Sections will fall back to direct extraction."
|
||||
)
|
||||
|
||||
if preExtractedCache:
|
||||
logger.info(
|
||||
f"Pre-extraction complete: {len(preExtractedCache)} sections "
|
||||
"have pre-extracted content (will use lightweight AI path)"
|
||||
)
|
||||
|
||||
return preExtractedCache
|
||||
|
||||
async def _fillChapterSections(
|
||||
self,
|
||||
chapterStructure: Dict[str, Any],
|
||||
|
|
@ -1564,27 +1862,42 @@ class StructureFiller:
|
|||
"docFormat": docFormat # Include output format
|
||||
})
|
||||
|
||||
MAX_TOTAL_SECTIONS = 35
|
||||
if totalSections > MAX_TOTAL_SECTIONS:
|
||||
logger.warning(
|
||||
f"Structure has {totalSections} sections (limit {MAX_TOTAL_SECTIONS}). "
|
||||
"Truncating to stay within budget."
|
||||
)
|
||||
allSectionTasks = allSectionTasks[:MAX_TOTAL_SECTIONS]
|
||||
totalSections = len(allSectionTasks)
|
||||
|
||||
preExtractedCache = await self._preExtractSharedContent(
|
||||
contentParts, allSectionTasks, userPrompt, fillOperationId
|
||||
)
|
||||
|
||||
logger.info(f"Starting FULLY PARALLEL section generation: {totalSections} sections across {totalChapters} chapters")
|
||||
|
||||
# Create task wrapper for each section with progress tracking
|
||||
async def processSectionWithSemaphore(taskInfo):
|
||||
checkWorkflowStopped(self.services)
|
||||
sectionId = taskInfo["section"].get("id", "unknown")
|
||||
async with sectionSemaphore:
|
||||
result = await self._processSingleSection(
|
||||
section=taskInfo["section"],
|
||||
sectionIndex=taskInfo["sectionIndex"],
|
||||
totalSections=taskInfo["chapterSectionCount"],
|
||||
chapterIndex=0, # Not used for sequential logic anymore
|
||||
chapterIndex=0,
|
||||
totalChapters=totalChapters,
|
||||
chapterId=taskInfo["chapterId"],
|
||||
chapterOperationId=fillOperationId, # Use fillOperationId as parent (no chapter-level ops in parallel mode)
|
||||
chapterOperationId=fillOperationId,
|
||||
fillOperationId=fillOperationId,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt,
|
||||
all_sections_list=all_sections_list,
|
||||
language=taskInfo["docLanguage"],
|
||||
outputFormat=taskInfo.get("docFormat", "txt"), # Pass output format
|
||||
calculateOverallProgress=lambda *args: completedSections[0] / totalSections if totalSections > 0 else 1.0
|
||||
outputFormat=taskInfo.get("docFormat", "txt"),
|
||||
calculateOverallProgress=lambda *args: completedSections[0] / totalSections if totalSections > 0 else 1.0,
|
||||
preExtractedText=preExtractedCache.get(sectionId)
|
||||
)
|
||||
|
||||
# Update progress after each section completes
|
||||
|
|
@ -1810,6 +2123,7 @@ GENERATION HINT: {generationHint}
|
|||
- Each section should serve a clear purpose with meaningful data
|
||||
- If no relevant data exists for a topic, do NOT create a section for it
|
||||
- Prefer ONE comprehensive section over multiple sparse sections
|
||||
- HARD LIMIT: Maximum 5 sections per chapter. Combine related subtopics into single sections to stay within this limit.
|
||||
|
||||
**CRITICAL**: The chapter's generationHint above describes what content this chapter should generate. If the generationHint references documents/images/data, then EACH section that generates content for this chapter MUST assign the relevant ContentParts from AVAILABLE CONTENT PARTS below.
|
||||
|
||||
|
|
@ -1893,7 +2207,8 @@ Return only valid JSON. Do not include any explanatory text outside the JSON.
|
|||
sectionIndex: Optional[int] = None,
|
||||
isAggregation: bool = False,
|
||||
language: str = "en",
|
||||
outputFormat: str = "txt"
|
||||
outputFormat: str = "txt",
|
||||
preExtractedText: Optional[str] = None
|
||||
) -> tuple[str, str]:
|
||||
"""Baue Prompt für Section-Generierung mit vollständigem Kontext."""
|
||||
# Filtere None-Werte
|
||||
|
|
@ -2057,7 +2372,7 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles,
|
|||
5. For table: Extract all rows from the context. Return {{"headers": [...], "rows": []}} only if no data exists.
|
||||
6. Format based on content_type ({effectiveContentType}).
|
||||
7. No HTML/styling: Plain text only, no markup.
|
||||
8. CONTINUE UNTIL COMPLETE: Extract ALL data from the provided context. Do NOT stop early because you think the response might be too long. Do NOT truncate or abbreviate. Do not impose artificial limits on yourself.
|
||||
8. Focus on the MOST RELEVANT information for this section's topic. Extract key facts, data, and findings. Omit redundant, repetitive, or tangential content.
|
||||
|
||||
|
||||
## OUTPUT FORMAT
|
||||
|
|
@ -2083,6 +2398,62 @@ Output requirements:
|
|||
{userPrompt}
|
||||
```
|
||||
|
||||
## CONTEXT
|
||||
{contextText if contextText else ""}
|
||||
"""
|
||||
elif preExtractedText:
|
||||
prompt = f"""# TASK: Generate Section Content from Pre-Extracted Data
|
||||
|
||||
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
|
||||
|
||||
## SECTION METADATA
|
||||
- Section ID: {sectionId}
|
||||
- Content Type: {effectiveContentType}
|
||||
- Generation Hint: {generationHint}{formatNoteAggr}
|
||||
|
||||
## CONTENT EFFICIENCY PRINCIPLES
|
||||
- Generate COMPACT content: Focus on essential facts only
|
||||
- AVOID verbose text, filler phrases, or redundant explanations
|
||||
- Be CONCISE and direct - every word should add value
|
||||
- NO introductory phrases like "This section describes..." or "Here we present..."
|
||||
- Minimize output size for efficient processing
|
||||
|
||||
## PRE-EXTRACTED CONTENT FOR THIS SECTION
|
||||
```
|
||||
{preExtractedText}
|
||||
```
|
||||
|
||||
## INSTRUCTIONS
|
||||
1. Use ONLY the pre-extracted content above. Never invent or generate data not present in it.
|
||||
2. If the pre-extracted content is empty, return empty structures.
|
||||
3. Format based on content_type ({effectiveContentType}).
|
||||
4. Return only valid JSON with "elements" array.
|
||||
5. No HTML/styling: Plain text only, no markup.
|
||||
6. Focus on the MOST RELEVANT information. Be concise.
|
||||
|
||||
## OUTPUT FORMAT
|
||||
Return a JSON object with this structure:
|
||||
|
||||
{{
|
||||
"elements": [
|
||||
{{
|
||||
"type": "{effectiveContentType}",
|
||||
"content": {contentStructureExample}
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
Output requirements:
|
||||
- "content" must be an object (never a string)
|
||||
- Return only valid JSON - no text before, no text after, no comments, no explanations, no markdown code fences
|
||||
- Start with {{ and end with }} - return ONLY the JSON object itself
|
||||
- No invented data: Return empty structures if pre-extracted content is empty
|
||||
|
||||
## USER REQUEST
|
||||
```
|
||||
{userPrompt}
|
||||
```
|
||||
|
||||
## CONTEXT
|
||||
{contextText if contextText else ""}
|
||||
"""
|
||||
|
|
@ -2117,7 +2488,7 @@ LANGUAGE: Generate all content in {language.upper()} language. All text, titles,
|
|||
3. Format based on content_type ({effectiveContentType}).
|
||||
4. Return only valid JSON with "elements" array.
|
||||
5. No HTML/styling: Plain text only, no markup.
|
||||
6. CONTINUE UNTIL COMPLETE: Extract ALL data from the provided context. Do NOT stop early because you think the response might be too long. Do NOT truncate or abbreviate. Do not impose artificial limits on yourself.
|
||||
6. Focus on the MOST RELEVANT information for this section's topic. Extract key facts, data, and findings. Omit redundant, repetitive, or tangential content.
|
||||
|
||||
## OUTPUT FORMAT
|
||||
Return a JSON object with this structure:
|
||||
|
|
|
|||
|
|
@ -430,6 +430,7 @@ Then chapters that generate those generic content types MUST assign the relevant
|
|||
## CHAPTER STRUCTURE REQUIREMENTS
|
||||
- Generate chapters based on USER REQUEST - analyze what structure the user wants
|
||||
- Create ONLY the minimum chapters needed to cover the user's request - avoid over-structuring
|
||||
- HARD LIMIT: Maximum 7 chapters per document. If the topic can be covered in fewer, prefer fewer. Combine related topics into single chapters rather than creating many small ones.
|
||||
- IMPORTANT: Each chapter MUST have ALL these fields:
|
||||
- id: Unique identifier (e.g., "chapter_1")
|
||||
- level: Heading level (1, 2, 3, etc.)
|
||||
|
|
|
|||
|
|
@ -205,36 +205,20 @@ class BillingService:
|
|||
workflowId: str = None,
|
||||
aicoreProvider: str = None,
|
||||
aicoreModel: str = None,
|
||||
description: str = None
|
||||
description: str = None,
|
||||
processingTime: float = None,
|
||||
bytesSent: int = None,
|
||||
bytesReceived: int = None,
|
||||
errorCount: int = None
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Record AI usage cost as a billing transaction.
|
||||
|
||||
This method:
|
||||
1. Applies the pricing markup
|
||||
2. Creates a DEBIT transaction
|
||||
3. Updates the account balance
|
||||
|
||||
Args:
|
||||
priceCHF: Base price from AI model (before markup)
|
||||
workflowId: Optional workflow ID
|
||||
aicoreProvider: AICore provider name (e.g., 'anthropic', 'openai')
|
||||
aicoreModel: AICore model name (e.g., 'claude-4-sonnet', 'gpt-4o')
|
||||
description: Optional description
|
||||
|
||||
Returns:
|
||||
Created transaction dict or None if not recorded
|
||||
"""
|
||||
"""Record AI usage cost as a billing transaction with markup applied."""
|
||||
if priceCHF <= 0:
|
||||
return None
|
||||
|
||||
# Apply markup
|
||||
finalPrice = self.calculatePriceWithMarkup(priceCHF)
|
||||
|
||||
if finalPrice <= 0:
|
||||
return None
|
||||
|
||||
# Build description
|
||||
if not description:
|
||||
description = f"AI Usage: {aicoreModel or aicoreProvider or 'unknown'}"
|
||||
|
||||
|
|
@ -247,9 +231,17 @@ class BillingService:
|
|||
featureCode=self.featureCode,
|
||||
aicoreProvider=aicoreProvider,
|
||||
aicoreModel=aicoreModel,
|
||||
description=description
|
||||
description=description,
|
||||
processingTime=processingTime,
|
||||
bytesSent=bytesSent,
|
||||
bytesReceived=bytesReceived,
|
||||
errorCount=errorCount
|
||||
)
|
||||
|
||||
def getWorkflowCost(self, workflowId: str) -> float:
|
||||
"""Get total cost for a workflow from billing transactions."""
|
||||
return self._billingInterface.getWorkflowCost(workflowId)
|
||||
|
||||
# =========================================================================
|
||||
# Provider Permission Check (via RBAC)
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
import logging
|
||||
from typing import Dict, Any, List, Optional, Callable
|
||||
from modules.datamodels.datamodelUam import User, UserConnection
|
||||
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage, ChatStat, ChatLog
|
||||
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage, ChatLog
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||
from modules.shared.progressLogger import ProgressLogger
|
||||
|
||||
|
|
@ -411,23 +411,159 @@ class ChatService:
|
|||
return None
|
||||
|
||||
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
|
||||
"""Get file information"""
|
||||
file_item = self.interfaceDbComponent.getFile(fileId)
|
||||
if file_item:
|
||||
"""Get file information including new fields (tags, folderId, description, status)."""
|
||||
fileItem = self.interfaceDbComponent.getFile(fileId)
|
||||
if fileItem:
|
||||
return {
|
||||
"id": file_item.id,
|
||||
"fileName": file_item.fileName,
|
||||
"size": file_item.fileSize,
|
||||
"mimeType": file_item.mimeType,
|
||||
"fileHash": file_item.fileHash,
|
||||
"creationDate": file_item.creationDate
|
||||
"id": fileItem.id,
|
||||
"fileName": fileItem.fileName,
|
||||
"size": fileItem.fileSize,
|
||||
"mimeType": fileItem.mimeType,
|
||||
"fileHash": fileItem.fileHash,
|
||||
"creationDate": fileItem.creationDate,
|
||||
"tags": getattr(fileItem, "tags", None),
|
||||
"folderId": getattr(fileItem, "folderId", None),
|
||||
"description": getattr(fileItem, "description", None),
|
||||
"status": getattr(fileItem, "status", None),
|
||||
}
|
||||
return None
|
||||
|
||||
def getFileData(self, fileId: str) -> bytes:
|
||||
"""Get file data by ID"""
|
||||
"""Get file data by ID."""
|
||||
return self.interfaceDbComponent.getFileData(fileId)
|
||||
|
||||
def getFileContent(self, fileId: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get file content as text or base64 via FilePreview."""
|
||||
preview = self.interfaceDbComponent.getFileContent(fileId)
|
||||
if preview:
|
||||
return preview.toDictWithBase64Encoding()
|
||||
return None
|
||||
|
||||
def listFiles(
|
||||
self,
|
||||
folderId: str = None,
|
||||
tags: List[str] = None,
|
||||
search: str = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""List files for the current user with optional filters.
|
||||
|
||||
Args:
|
||||
folderId: Filter by folder (None = root / all).
|
||||
tags: Filter by tags (any match).
|
||||
search: Search in fileName and description.
|
||||
|
||||
Returns:
|
||||
List of file info dicts.
|
||||
"""
|
||||
allFiles = self.interfaceDbComponent.getAllFiles()
|
||||
results = []
|
||||
for fileItem in allFiles:
|
||||
if folderId is not None:
|
||||
itemFolderId = getattr(fileItem, "folderId", None)
|
||||
if itemFolderId != folderId:
|
||||
continue
|
||||
|
||||
if tags:
|
||||
itemTags = getattr(fileItem, "tags", None) or []
|
||||
if not any(t in itemTags for t in tags):
|
||||
continue
|
||||
|
||||
if search:
|
||||
searchLower = search.lower()
|
||||
nameMatch = searchLower in (fileItem.fileName or "").lower()
|
||||
descMatch = searchLower in (getattr(fileItem, "description", None) or "").lower()
|
||||
if not nameMatch and not descMatch:
|
||||
continue
|
||||
|
||||
results.append({
|
||||
"id": fileItem.id,
|
||||
"fileName": fileItem.fileName,
|
||||
"mimeType": fileItem.mimeType,
|
||||
"fileSize": fileItem.fileSize,
|
||||
"creationDate": fileItem.creationDate,
|
||||
"tags": getattr(fileItem, "tags", None),
|
||||
"folderId": getattr(fileItem, "folderId", None),
|
||||
"description": getattr(fileItem, "description", None),
|
||||
"status": getattr(fileItem, "status", None),
|
||||
})
|
||||
return results
|
||||
|
||||
def listFolders(self, parentId: str = None) -> List[Dict[str, Any]]:
|
||||
"""List file folders for the current user.
|
||||
|
||||
Args:
|
||||
parentId: Parent folder ID (None = root folders).
|
||||
|
||||
Returns:
|
||||
List of folder dicts.
|
||||
"""
|
||||
from modules.datamodels.datamodelFileFolder import FileFolder
|
||||
recordFilter = {"_createdBy": self.user.id if self.user else ""}
|
||||
if parentId is not None:
|
||||
recordFilter["parentId"] = parentId
|
||||
else:
|
||||
recordFilter["parentId"] = None
|
||||
return self.interfaceDbComponent.db.getRecordset(FileFolder, recordFilter=recordFilter)
|
||||
|
||||
def createFolder(self, name: str, parentId: str = None) -> Dict[str, Any]:
|
||||
"""Create a new file folder."""
|
||||
from modules.datamodels.datamodelFileFolder import FileFolder
|
||||
folder = FileFolder(name=name, parentId=parentId)
|
||||
return self.interfaceDbComponent.db.recordCreate(FileFolder, folder)
|
||||
|
||||
# ---- DataSource CRUD ----
|
||||
|
||||
def createDataSource(
|
||||
self, connectionId: str, sourceType: str, path: str, label: str,
|
||||
featureInstanceId: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Create a new external data source reference."""
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
ds = DataSource(
|
||||
connectionId=connectionId,
|
||||
sourceType=sourceType,
|
||||
path=path,
|
||||
label=label,
|
||||
featureInstanceId=featureInstanceId or self._context.feature_instance_id or "",
|
||||
mandateId=self._context.mandate_id or "",
|
||||
userId=self.user.id if self.user else "",
|
||||
)
|
||||
return self.interfaceDbComponent.db.recordCreate(DataSource, ds)
|
||||
|
||||
def listDataSources(self, featureInstanceId: str = None) -> List[Dict[str, Any]]:
|
||||
"""List data sources, optionally filtered by feature instance."""
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
recordFilter = {}
|
||||
if featureInstanceId:
|
||||
recordFilter["featureInstanceId"] = featureInstanceId
|
||||
return self.interfaceDbComponent.db.getRecordset(DataSource, recordFilter=recordFilter)
|
||||
|
||||
def getDataSource(self, dataSourceId: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get a single data source by ID."""
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
results = self.interfaceDbComponent.db.getRecordset(DataSource, recordFilter={"id": dataSourceId})
|
||||
return results[0] if results else None
|
||||
|
||||
def deleteDataSource(self, dataSourceId: str) -> bool:
|
||||
"""Delete a data source."""
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
try:
|
||||
self.interfaceDbComponent.db.recordDelete(DataSource, dataSourceId)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete DataSource {dataSourceId}: {e}")
|
||||
return False
|
||||
|
||||
def getUserConnections(self) -> List[Dict[str, Any]]:
|
||||
"""Get all UserConnections for the current user."""
|
||||
try:
|
||||
if self.interfaceDbApp and self.user:
|
||||
connections = self.interfaceDbApp.getUserConnections(self.user.id)
|
||||
return [c.model_dump() if hasattr(c, "model_dump") else c for c in (connections or [])]
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting user connections: {e}")
|
||||
return []
|
||||
|
||||
def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
|
||||
"""
|
||||
Diagnose document access issues and provide recovery information.
|
||||
|
|
@ -688,35 +824,6 @@ class ChatService:
|
|||
workflow.logs.append(chatLog)
|
||||
return chatLog
|
||||
|
||||
def storeWorkflowStat(self, workflow: Any, aiResponse: Any, process: str) -> ChatStat:
|
||||
"""Persist workflow-level ChatStat from AiCallResponse and append to workflow stats list.
|
||||
|
||||
Billing is handled at the AI call source (interfaceAiObjects._callWithModel)
|
||||
via billingCallback - not here. This method only handles workflow stats.
|
||||
"""
|
||||
try:
|
||||
statData = {
|
||||
"workflowId": workflow.id,
|
||||
"process": process,
|
||||
"engine": aiResponse.modelName,
|
||||
"priceCHF": aiResponse.priceCHF,
|
||||
"processingTime": aiResponse.processingTime,
|
||||
"bytesSent": aiResponse.bytesSent,
|
||||
"bytesReceived": aiResponse.bytesReceived,
|
||||
"errorCount": aiResponse.errorCount
|
||||
}
|
||||
|
||||
stat = self.interfaceDbChat.createStat(statData)
|
||||
|
||||
if not hasattr(workflow, 'stats') or workflow.stats is None:
|
||||
workflow.stats = []
|
||||
workflow.stats.append(stat)
|
||||
|
||||
return stat
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to store workflow stat: {e}")
|
||||
raise
|
||||
|
||||
def updateMessage(self, messageId: str, messageData: Dict[str, Any]):
|
||||
"""Update message by delegating to the chat interface"""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -2,90 +2,147 @@
|
|||
# All rights reserved.
|
||||
from typing import Any, Dict, List
|
||||
import json
|
||||
import logging
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..subRegistry import Chunker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class StructureChunker(Chunker):
|
||||
def chunk(self, part: ContentPart, options: Dict[str, Any]) -> list[Dict[str, Any]]:
|
||||
maxBytes = int(options.get("structureChunkSize", 40000))
|
||||
data = part.data or ""
|
||||
# best-effort: try JSON list/object bucketing; else fallback to line-based
|
||||
chunks: List[Dict[str, Any]] = []
|
||||
|
||||
try:
|
||||
obj = json.loads(data)
|
||||
def emit(bucket: Any):
|
||||
text = json.dumps(bucket, ensure_ascii=False)
|
||||
chunks.append({"data": text, "size": len(text.encode('utf-8')), "order": len(chunks)})
|
||||
if isinstance(obj, list):
|
||||
bucket: list[Any] = []
|
||||
size = 0
|
||||
for item in obj:
|
||||
text = json.dumps(item, ensure_ascii=False)
|
||||
s = len(text.encode('utf-8'))
|
||||
if size + s > maxBytes and bucket:
|
||||
emit(bucket)
|
||||
bucket = [item]
|
||||
size = s
|
||||
else:
|
||||
bucket.append(item)
|
||||
size += s
|
||||
if bucket:
|
||||
emit(bucket)
|
||||
else:
|
||||
# JSON object (dict) - check if it fits
|
||||
text = json.dumps(obj, ensure_ascii=False)
|
||||
textSize = len(text.encode('utf-8'))
|
||||
if textSize <= maxBytes:
|
||||
emit(obj)
|
||||
else:
|
||||
# Object too large - try to split by keys if possible
|
||||
# For large objects, we need to chunk by character boundaries
|
||||
# since we can't split JSON objects arbitrarily
|
||||
if isinstance(obj, dict) and len(obj) > 1:
|
||||
# Try to split object into multiple chunks by keys
|
||||
# This preserves JSON structure better than line-based chunking
|
||||
currentChunk: Dict[str, Any] = {}
|
||||
currentSize = 2 # Start with "{}" overhead
|
||||
for key, value in obj.items():
|
||||
itemText = json.dumps({key: value}, ensure_ascii=False)
|
||||
itemSize = len(itemText.encode('utf-8'))
|
||||
# Account for comma and spacing between items
|
||||
if currentChunk:
|
||||
itemSize += 2 # ", " separator
|
||||
self._chunkValue(obj, maxBytes, chunks)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
self._chunkByLines(data, maxBytes, chunks)
|
||||
|
||||
if currentSize + itemSize > maxBytes and currentChunk:
|
||||
# Current chunk is full, emit it
|
||||
emit(currentChunk)
|
||||
currentChunk = {key: value}
|
||||
currentSize = len(itemText.encode('utf-8'))
|
||||
else:
|
||||
currentChunk[key] = value
|
||||
currentSize += itemSize
|
||||
|
||||
# Emit remaining chunk
|
||||
if currentChunk:
|
||||
emit(currentChunk)
|
||||
else:
|
||||
# Single large value or can't split - fallback to line chunking
|
||||
raise ValueError("too large")
|
||||
except Exception:
|
||||
current: List[str] = []
|
||||
size = 0
|
||||
for line in data.split('\n'):
|
||||
s = len(line.encode('utf-8')) + 1
|
||||
if size + s > maxBytes and current:
|
||||
text = '\n'.join(current)
|
||||
chunks.append({"data": text, "size": len(text.encode('utf-8')), "order": len(chunks)})
|
||||
current = [line]
|
||||
size = s
|
||||
else:
|
||||
current.append(line)
|
||||
size += s
|
||||
if current:
|
||||
text = '\n'.join(current)
|
||||
chunks.append({"data": text, "size": len(text.encode('utf-8')), "order": len(chunks)})
|
||||
return chunks
|
||||
|
||||
def _chunkValue(self, obj: Any, maxBytes: int, chunks: List[Dict[str, Any]]):
|
||||
"""Recursively chunk a JSON value (list or dict) into pieces <= maxBytes."""
|
||||
text = json.dumps(obj, ensure_ascii=False)
|
||||
if len(text.encode('utf-8')) <= maxBytes:
|
||||
self._emit(obj, chunks)
|
||||
return
|
||||
|
||||
if isinstance(obj, list):
|
||||
self._chunkList(obj, maxBytes, chunks)
|
||||
elif isinstance(obj, dict):
|
||||
self._chunkDict(obj, maxBytes, chunks)
|
||||
else:
|
||||
self._chunkByLines(text, maxBytes, chunks)
|
||||
|
||||
def _chunkList(self, items: list, maxBytes: int, chunks: List[Dict[str, Any]]):
|
||||
"""Split a JSON array into sub-arrays that each fit within maxBytes."""
|
||||
bucket: list = []
|
||||
bucketSize = 2 # "[]" overhead
|
||||
|
||||
for item in items:
|
||||
itemText = json.dumps(item, ensure_ascii=False)
|
||||
itemSize = len(itemText.encode('utf-8'))
|
||||
separator = 2 if bucket else 0 # ", "
|
||||
|
||||
if bucketSize + itemSize + separator > maxBytes and bucket:
|
||||
self._emit(bucket, chunks)
|
||||
bucket = []
|
||||
bucketSize = 2
|
||||
separator = 0
|
||||
|
||||
if itemSize + 2 > maxBytes:
|
||||
if bucket:
|
||||
self._emit(bucket, chunks)
|
||||
bucket = []
|
||||
bucketSize = 2
|
||||
self._chunkValue(item, maxBytes, chunks)
|
||||
else:
|
||||
bucket.append(item)
|
||||
bucketSize += itemSize + separator
|
||||
|
||||
if bucket:
|
||||
self._emit(bucket, chunks)
|
||||
|
||||
def _chunkDict(self, obj: dict, maxBytes: int, chunks: List[Dict[str, Any]]):
|
||||
"""Split a JSON object by keys. If a single key's value exceeds maxBytes, recurse into it."""
|
||||
if len(obj) <= 1:
|
||||
key, value = next(iter(obj.items()))
|
||||
if isinstance(value, (list, dict)):
|
||||
self._chunkSingleKeyValue(key, value, maxBytes, chunks)
|
||||
else:
|
||||
text = json.dumps(obj, ensure_ascii=False)
|
||||
self._chunkByLines(text, maxBytes, chunks)
|
||||
return
|
||||
|
||||
currentChunk: Dict[str, Any] = {}
|
||||
currentSize = 2 # "{}" overhead
|
||||
|
||||
for key, value in obj.items():
|
||||
itemText = json.dumps({key: value}, ensure_ascii=False)
|
||||
itemSize = len(itemText.encode('utf-8'))
|
||||
separator = 2 if currentChunk else 0
|
||||
|
||||
if currentSize + itemSize + separator > maxBytes and currentChunk:
|
||||
self._emit(currentChunk, chunks)
|
||||
currentChunk = {}
|
||||
currentSize = 2
|
||||
separator = 0
|
||||
|
||||
if itemSize + 2 > maxBytes:
|
||||
if currentChunk:
|
||||
self._emit(currentChunk, chunks)
|
||||
currentChunk = {}
|
||||
currentSize = 2
|
||||
if isinstance(value, (list, dict)):
|
||||
self._chunkSingleKeyValue(key, value, maxBytes, chunks)
|
||||
else:
|
||||
self._chunkByLines(itemText, maxBytes, chunks)
|
||||
else:
|
||||
currentChunk[key] = value
|
||||
currentSize += itemSize + separator
|
||||
|
||||
if currentChunk:
|
||||
self._emit(currentChunk, chunks)
|
||||
|
||||
def _chunkSingleKeyValue(self, key: str, value: Any, maxBytes: int, chunks: List[Dict[str, Any]]):
|
||||
"""Handle a single dict key whose value is too large. Wraps sub-chunks back in {key: subChunk}."""
|
||||
subChunks: List[Dict[str, Any]] = []
|
||||
self._chunkValue(value, maxBytes, subChunks)
|
||||
|
||||
for sub in subChunks:
|
||||
subData = json.loads(sub["data"])
|
||||
wrapped = {key: subData}
|
||||
wrappedText = json.dumps(wrapped, ensure_ascii=False)
|
||||
wrappedSize = len(wrappedText.encode('utf-8'))
|
||||
if wrappedSize <= maxBytes:
|
||||
self._emit(wrapped, chunks)
|
||||
else:
|
||||
self._chunkByLines(wrappedText, maxBytes, chunks)
|
||||
|
||||
def _emit(self, bucket: Any, chunks: List[Dict[str, Any]]):
|
||||
text = json.dumps(bucket, ensure_ascii=False)
|
||||
chunks.append({"data": text, "size": len(text.encode('utf-8')), "order": len(chunks)})
|
||||
|
||||
def _chunkByLines(self, data: str, maxBytes: int, chunks: List[Dict[str, Any]]):
|
||||
"""Line-based fallback for content that cannot be split structurally."""
|
||||
current: List[str] = []
|
||||
size = 0
|
||||
for line in data.split('\n'):
|
||||
s = len(line.encode('utf-8')) + 1
|
||||
if size + s > maxBytes and current:
|
||||
text = '\n'.join(current)
|
||||
chunks.append({"data": text, "size": len(text.encode('utf-8')), "order": len(chunks)})
|
||||
current = [line]
|
||||
size = s
|
||||
else:
|
||||
current.append(line)
|
||||
size += s
|
||||
if current:
|
||||
text = '\n'.join(current)
|
||||
chunks.append({"data": text, "size": len(text.encode('utf-8')), "order": len(chunks)})
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,175 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Audio extractor for common audio formats.
|
||||
|
||||
Extracts metadata (duration, bitrate, sample rate, channels) and produces
|
||||
an `audiostream` ContentPart. For files under 10 MB the base64 audio data
|
||||
is included; larger files only get metadata.
|
||||
|
||||
Optional dependency: mutagen (for rich metadata).
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
import base64
|
||||
import logging
|
||||
import struct
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..subUtils import makeId
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_AUDIO_MIME_TYPES = [
|
||||
"audio/mpeg",
|
||||
"audio/mp3",
|
||||
"audio/wav",
|
||||
"audio/x-wav",
|
||||
"audio/ogg",
|
||||
"audio/flac",
|
||||
"audio/x-flac",
|
||||
"audio/mp4",
|
||||
"audio/x-m4a",
|
||||
"audio/aac",
|
||||
"audio/webm",
|
||||
]
|
||||
_AUDIO_EXTENSIONS = [".mp3", ".wav", ".ogg", ".flac", ".m4a", ".aac", ".wma", ".webm"]
|
||||
|
||||
_MAX_INLINE_SIZE = 10 * 1024 * 1024 # 10 MB
|
||||
|
||||
|
||||
class AudioExtractor(Extractor):
|
||||
"""Extractor for audio files.
|
||||
|
||||
Produces:
|
||||
- 1 text ContentPart with metadata summary
|
||||
- 1 audiostream ContentPart (base64 data included only if < 10 MB)
|
||||
"""
|
||||
|
||||
def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
|
||||
if mimeType in _AUDIO_MIME_TYPES:
|
||||
return True
|
||||
lower = (fileName or "").lower()
|
||||
return any(lower.endswith(ext) for ext in _AUDIO_EXTENSIONS)
|
||||
|
||||
def getSupportedExtensions(self) -> list[str]:
|
||||
return list(_AUDIO_EXTENSIONS)
|
||||
|
||||
def getSupportedMimeTypes(self) -> list[str]:
|
||||
return list(_AUDIO_MIME_TYPES)
|
||||
|
||||
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
|
||||
fileName = context.get("fileName", "audio")
|
||||
mimeType = context.get("mimeType") or "audio/mpeg"
|
||||
fileSize = len(fileBytes)
|
||||
|
||||
rootId = makeId()
|
||||
parts: List[ContentPart] = []
|
||||
|
||||
meta = _extractMetadata(fileBytes, fileName)
|
||||
meta["size"] = fileSize
|
||||
meta["fileName"] = fileName
|
||||
meta["mimeType"] = mimeType
|
||||
|
||||
metaLines = [f"Audio file: {fileName}"]
|
||||
if meta.get("duration"):
|
||||
mins = int(meta["duration"] // 60)
|
||||
secs = int(meta["duration"] % 60)
|
||||
metaLines.append(f"Duration: {mins}:{secs:02d}")
|
||||
if meta.get("bitrate"):
|
||||
metaLines.append(f"Bitrate: {meta['bitrate']} kbps")
|
||||
if meta.get("sampleRate"):
|
||||
metaLines.append(f"Sample rate: {meta['sampleRate']} Hz")
|
||||
if meta.get("channels"):
|
||||
metaLines.append(f"Channels: {meta['channels']}")
|
||||
if meta.get("title") or meta.get("artist") or meta.get("album"):
|
||||
metaLines.append(f"Title: {meta.get('title', 'N/A')}")
|
||||
metaLines.append(f"Artist: {meta.get('artist', 'N/A')}")
|
||||
metaLines.append(f"Album: {meta.get('album', 'N/A')}")
|
||||
metaLines.append(f"Size: {fileSize:,} bytes")
|
||||
|
||||
parts.append(ContentPart(
|
||||
id=rootId, parentId=None, label="metadata",
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data="\n".join(metaLines), metadata=meta,
|
||||
))
|
||||
|
||||
audioData = ""
|
||||
if fileSize <= _MAX_INLINE_SIZE:
|
||||
audioData = base64.b64encode(fileBytes).decode("utf-8")
|
||||
|
||||
parts.append(ContentPart(
|
||||
id=makeId(), parentId=rootId, label="audiostream",
|
||||
typeGroup="audiostream", mimeType=mimeType,
|
||||
data=audioData, metadata={"size": fileSize, "inlined": fileSize <= _MAX_INLINE_SIZE},
|
||||
))
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
def _extractMetadata(fileBytes: bytes, fileName: str) -> Dict[str, Any]:
|
||||
"""Extract audio metadata using mutagen (optional) with stdlib fallback."""
|
||||
meta: Dict[str, Any] = {}
|
||||
|
||||
try:
|
||||
import mutagen
|
||||
import io
|
||||
audio = mutagen.File(io.BytesIO(fileBytes))
|
||||
if audio is not None:
|
||||
if audio.info:
|
||||
meta["duration"] = getattr(audio.info, "length", None)
|
||||
meta["bitrate"] = getattr(audio.info, "bitrate", None)
|
||||
if meta["bitrate"]:
|
||||
meta["bitrate"] = meta["bitrate"] // 1000
|
||||
meta["sampleRate"] = getattr(audio.info, "sample_rate", None)
|
||||
meta["channels"] = getattr(audio.info, "channels", None)
|
||||
|
||||
tags = audio.tags
|
||||
if tags:
|
||||
meta["title"] = _getTag(tags, ["TIT2", "title", "\xa9nam"])
|
||||
meta["artist"] = _getTag(tags, ["TPE1", "artist", "\xa9ART"])
|
||||
meta["album"] = _getTag(tags, ["TALB", "album", "\xa9alb"])
|
||||
|
||||
return {k: v for k, v in meta.items() if v is not None}
|
||||
except ImportError:
|
||||
logger.debug("mutagen not installed -- using basic metadata extraction")
|
||||
except Exception as e:
|
||||
logger.debug(f"mutagen metadata extraction failed: {e}")
|
||||
|
||||
lower = fileName.lower()
|
||||
if lower.endswith(".wav"):
|
||||
meta.update(_parseWavHeader(fileBytes))
|
||||
|
||||
return {k: v for k, v in meta.items() if v is not None}
|
||||
|
||||
|
||||
def _getTag(tags, keys: list) -> Any:
|
||||
"""Try multiple tag keys and return the first found value."""
|
||||
for key in keys:
|
||||
val = tags.get(key)
|
||||
if val is not None:
|
||||
return str(val) if not isinstance(val, str) else val
|
||||
return None
|
||||
|
||||
|
||||
def _parseWavHeader(fileBytes: bytes) -> Dict[str, Any]:
|
||||
"""Minimal WAV header parser for basic metadata."""
|
||||
meta: Dict[str, Any] = {}
|
||||
if len(fileBytes) < 44:
|
||||
return meta
|
||||
try:
|
||||
if fileBytes[:4] != b"RIFF" or fileBytes[8:12] != b"WAVE":
|
||||
return meta
|
||||
channels = struct.unpack_from("<H", fileBytes, 22)[0]
|
||||
sampleRate = struct.unpack_from("<I", fileBytes, 24)[0]
|
||||
bitsPerSample = struct.unpack_from("<H", fileBytes, 34)[0]
|
||||
dataSize = struct.unpack_from("<I", fileBytes, 40)[0]
|
||||
|
||||
meta["channels"] = channels
|
||||
meta["sampleRate"] = sampleRate
|
||||
meta["bitrate"] = (sampleRate * channels * bitsPerSample) // 1000
|
||||
if sampleRate and channels and bitsPerSample:
|
||||
meta["duration"] = dataSize / (sampleRate * channels * (bitsPerSample / 8))
|
||||
except Exception:
|
||||
pass
|
||||
return meta
|
||||
|
|
@ -0,0 +1,339 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Container extractor for ZIP, TAR, GZ, and 7Z archives.
|
||||
|
||||
Recursively unpacks containers and delegates each contained file to the
|
||||
appropriate type-specific extractor via the ExtractorRegistry.
|
||||
|
||||
Safety limits:
|
||||
- MAX_TOTAL_EXTRACTED_SIZE: 500 MB
|
||||
- MAX_FILE_COUNT: 10000
|
||||
- maxDepth: 5
|
||||
- Symlinks blocked
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
import io
|
||||
import logging
|
||||
import mimetypes
|
||||
import zipfile
|
||||
import tarfile
|
||||
|
||||
from ..subUtils import makeId
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.datamodels.datamodelContent import ContainerLimitError, ContentContextRef
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_TOTAL_EXTRACTED_SIZE = 500 * 1024 * 1024 # 500 MB
|
||||
MAX_FILE_COUNT = 10000
|
||||
MAX_DEPTH = 5
|
||||
|
||||
_CONTAINER_MIME_TYPES = [
|
||||
"application/zip",
|
||||
"application/x-zip-compressed",
|
||||
"application/x-tar",
|
||||
"application/gzip",
|
||||
"application/x-gzip",
|
||||
"application/x-7z-compressed",
|
||||
]
|
||||
_CONTAINER_EXTENSIONS = [".zip", ".tar", ".gz", ".tar.gz", ".tgz", ".7z"]
|
||||
|
||||
|
||||
def _detectMimeType(fileName: str) -> str:
|
||||
"""Detect MIME type from file name."""
|
||||
guessed, _ = mimetypes.guess_type(fileName)
|
||||
return guessed or "application/octet-stream"
|
||||
|
||||
|
||||
def _isSymlink(info) -> bool:
|
||||
"""Check if a tar member is a symlink."""
|
||||
if hasattr(info, "issym") and callable(info.issym):
|
||||
return info.issym() or info.islnk()
|
||||
return False
|
||||
|
||||
|
||||
class ContainerExtractor(Extractor):
|
||||
"""Extractor for archive containers (ZIP, TAR, GZ, 7Z).
|
||||
|
||||
Recursively resolves nested containers and produces a flat list of
|
||||
ContentPart entries -- one per contained file -- with containerPath metadata.
|
||||
"""
|
||||
|
||||
def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
|
||||
if mimeType in _CONTAINER_MIME_TYPES:
|
||||
return True
|
||||
lower = (fileName or "").lower()
|
||||
return any(lower.endswith(ext) for ext in _CONTAINER_EXTENSIONS)
|
||||
|
||||
def getSupportedExtensions(self) -> list[str]:
|
||||
return list(_CONTAINER_EXTENSIONS)
|
||||
|
||||
def getSupportedMimeTypes(self) -> list[str]:
|
||||
return list(_CONTAINER_MIME_TYPES)
|
||||
|
||||
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
|
||||
"""Extract by recursively unpacking the container."""
|
||||
fileName = context.get("fileName", "archive")
|
||||
mimeType = context.get("mimeType", "application/octet-stream")
|
||||
|
||||
rootId = makeId()
|
||||
parts: List[ContentPart] = [
|
||||
ContentPart(
|
||||
id=rootId,
|
||||
parentId=None,
|
||||
label=fileName,
|
||||
typeGroup="container",
|
||||
mimeType=mimeType,
|
||||
data="",
|
||||
metadata={"size": len(fileBytes), "containerType": "archive"},
|
||||
)
|
||||
]
|
||||
|
||||
state = {"totalSize": 0, "fileCount": 0}
|
||||
try:
|
||||
childParts = _resolveContainerRecursive(
|
||||
fileBytes, mimeType, fileName, rootId, "", 0, state
|
||||
)
|
||||
parts.extend(childParts)
|
||||
except ContainerLimitError as e:
|
||||
logger.warning(f"Container limit reached for {fileName}: {e}")
|
||||
parts.append(ContentPart(
|
||||
id=makeId(),
|
||||
parentId=rootId,
|
||||
label="limit_exceeded",
|
||||
typeGroup="text",
|
||||
mimeType="text/plain",
|
||||
data=str(e),
|
||||
metadata={"warning": "Container extraction limit exceeded"},
|
||||
))
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
def _resolveContainerRecursive(
|
||||
containerBytes: bytes,
|
||||
containerMime: str,
|
||||
containerName: str,
|
||||
parentId: str,
|
||||
containerPath: str,
|
||||
depth: int,
|
||||
state: Dict[str, int],
|
||||
) -> List[ContentPart]:
|
||||
"""Recursively unpack containers. No AI calls."""
|
||||
if depth > MAX_DEPTH:
|
||||
raise ContainerLimitError(f"Max nesting depth {MAX_DEPTH} exceeded")
|
||||
|
||||
parts: List[ContentPart] = []
|
||||
|
||||
if containerMime in ("application/zip", "application/x-zip-compressed") or containerName.lower().endswith(".zip"):
|
||||
parts.extend(_extractZip(containerBytes, parentId, containerPath, depth, state))
|
||||
elif containerMime in ("application/x-tar",) or containerName.lower().endswith(".tar"):
|
||||
parts.extend(_extractTar(containerBytes, parentId, containerPath, depth, state, compressed=False))
|
||||
elif containerMime in ("application/gzip", "application/x-gzip") or containerName.lower().endswith((".gz", ".tgz", ".tar.gz")):
|
||||
parts.extend(_extractTar(containerBytes, parentId, containerPath, depth, state, compressed=True))
|
||||
elif containerName.lower().endswith(".7z"):
|
||||
parts.extend(_extract7z(containerBytes, parentId, containerPath, depth, state))
|
||||
else:
|
||||
logger.warning(f"Unknown container format: {containerMime} ({containerName})")
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
def _addFilePart(
|
||||
data: bytes,
|
||||
fileName: str,
|
||||
parentId: str,
|
||||
containerPath: str,
|
||||
state: Dict[str, int],
|
||||
) -> List[ContentPart]:
|
||||
"""Extract a file via its type-specific Extractor and return ContentParts."""
|
||||
state["totalSize"] += len(data)
|
||||
state["fileCount"] += 1
|
||||
|
||||
if state["totalSize"] > MAX_TOTAL_EXTRACTED_SIZE:
|
||||
raise ContainerLimitError(f"Total extracted size exceeds {MAX_TOTAL_EXTRACTED_SIZE // (1024 * 1024)} MB")
|
||||
if state["fileCount"] > MAX_FILE_COUNT:
|
||||
raise ContainerLimitError(f"File count exceeds {MAX_FILE_COUNT}")
|
||||
|
||||
entryPath = f"{containerPath}/{fileName}" if containerPath else fileName
|
||||
detectedMime = _detectMimeType(fileName)
|
||||
|
||||
from ..subRegistry import ExtractorRegistry
|
||||
registry = ExtractorRegistry()
|
||||
extractor = registry.resolve(detectedMime, fileName)
|
||||
|
||||
if extractor and not isinstance(extractor, ContainerExtractor):
|
||||
try:
|
||||
childParts = extractor.extract(data, {"fileName": fileName, "mimeType": detectedMime})
|
||||
for part in childParts:
|
||||
part.parentId = parentId
|
||||
if not part.metadata:
|
||||
part.metadata = {}
|
||||
part.metadata["containerPath"] = entryPath
|
||||
return childParts
|
||||
except Exception as e:
|
||||
logger.warning(f"Type-extractor failed for {fileName} in container: {e}")
|
||||
|
||||
import base64
|
||||
encodedData = base64.b64encode(data).decode("utf-8") if data else ""
|
||||
|
||||
return [ContentPart(
|
||||
id=makeId(),
|
||||
parentId=parentId,
|
||||
label=fileName,
|
||||
typeGroup="binary",
|
||||
mimeType=detectedMime,
|
||||
data=encodedData,
|
||||
metadata={
|
||||
"size": len(data),
|
||||
"containerPath": entryPath,
|
||||
"contextRef": ContentContextRef(
|
||||
containerPath=entryPath,
|
||||
location="file",
|
||||
).model_dump(),
|
||||
},
|
||||
)]
|
||||
|
||||
|
||||
def _isNestedContainer(fileName: str, mimeType: str) -> bool:
|
||||
lower = fileName.lower()
|
||||
return any(lower.endswith(ext) for ext in _CONTAINER_EXTENSIONS) or mimeType in _CONTAINER_MIME_TYPES
|
||||
|
||||
|
||||
def _extractZip(
|
||||
data: bytes, parentId: str, containerPath: str, depth: int, state: Dict[str, int]
|
||||
) -> List[ContentPart]:
|
||||
parts: List[ContentPart] = []
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(data)) as zf:
|
||||
for info in zf.infolist():
|
||||
if info.is_dir():
|
||||
continue
|
||||
if info.file_size == 0:
|
||||
continue
|
||||
|
||||
entryPath = f"{containerPath}/{info.filename}" if containerPath else info.filename
|
||||
entryMime = _detectMimeType(info.filename)
|
||||
entryData = zf.read(info.filename)
|
||||
|
||||
if _isNestedContainer(info.filename, entryMime):
|
||||
nestedId = makeId()
|
||||
parts.append(ContentPart(
|
||||
id=nestedId,
|
||||
parentId=parentId,
|
||||
label=info.filename,
|
||||
typeGroup="container",
|
||||
mimeType=entryMime,
|
||||
data="",
|
||||
metadata={"size": len(entryData), "containerPath": entryPath},
|
||||
))
|
||||
nested = _resolveContainerRecursive(
|
||||
entryData, entryMime, info.filename, nestedId, entryPath, depth + 1, state
|
||||
)
|
||||
parts.extend(nested)
|
||||
else:
|
||||
parts.extend(_addFilePart(entryData, info.filename, parentId, containerPath, state))
|
||||
except zipfile.BadZipFile as e:
|
||||
logger.error(f"Invalid ZIP file: {e}")
|
||||
parts.append(ContentPart(
|
||||
id=makeId(), parentId=parentId, label="error",
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data=f"Invalid ZIP archive: {e}", metadata={"error": True},
|
||||
))
|
||||
return parts
|
||||
|
||||
|
||||
def _extractTar(
|
||||
data: bytes, parentId: str, containerPath: str, depth: int, state: Dict[str, int],
|
||||
compressed: bool = False,
|
||||
) -> List[ContentPart]:
|
||||
parts: List[ContentPart] = []
|
||||
mode = "r:gz" if compressed else "r:"
|
||||
try:
|
||||
with tarfile.open(fileobj=io.BytesIO(data), mode=mode) as tf:
|
||||
for member in tf.getmembers():
|
||||
if member.isdir():
|
||||
continue
|
||||
if _isSymlink(member):
|
||||
logger.warning(f"Skipping symlink in TAR: {member.name}")
|
||||
continue
|
||||
if member.size == 0:
|
||||
continue
|
||||
|
||||
entryPath = f"{containerPath}/{member.name}" if containerPath else member.name
|
||||
entryMime = _detectMimeType(member.name)
|
||||
fobj = tf.extractfile(member)
|
||||
if fobj is None:
|
||||
continue
|
||||
entryData = fobj.read()
|
||||
|
||||
if _isNestedContainer(member.name, entryMime):
|
||||
nestedId = makeId()
|
||||
parts.append(ContentPart(
|
||||
id=nestedId, parentId=parentId, label=member.name,
|
||||
typeGroup="container", mimeType=entryMime, data="",
|
||||
metadata={"size": len(entryData), "containerPath": entryPath},
|
||||
))
|
||||
nested = _resolveContainerRecursive(
|
||||
entryData, entryMime, member.name, nestedId, entryPath, depth + 1, state
|
||||
)
|
||||
parts.extend(nested)
|
||||
else:
|
||||
parts.extend(_addFilePart(entryData, member.name, parentId, containerPath, state))
|
||||
except tarfile.TarError as e:
|
||||
logger.error(f"Invalid TAR file: {e}")
|
||||
parts.append(ContentPart(
|
||||
id=makeId(), parentId=parentId, label="error",
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data=f"Invalid TAR archive: {e}", metadata={"error": True},
|
||||
))
|
||||
return parts
|
||||
|
||||
|
||||
def _extract7z(
|
||||
data: bytes, parentId: str, containerPath: str, depth: int, state: Dict[str, int]
|
||||
) -> List[ContentPart]:
|
||||
"""Extract 7z archive. Requires py7zr (optional dependency)."""
|
||||
parts: List[ContentPart] = []
|
||||
try:
|
||||
import py7zr
|
||||
with py7zr.SevenZipFile(io.BytesIO(data), mode="r") as szf:
|
||||
allFiles = szf.readall()
|
||||
for fileName, bio in allFiles.items():
|
||||
entryData = bio.read() if hasattr(bio, "read") else bytes(bio)
|
||||
if not entryData:
|
||||
continue
|
||||
|
||||
entryPath = f"{containerPath}/{fileName}" if containerPath else fileName
|
||||
entryMime = _detectMimeType(fileName)
|
||||
|
||||
if _isNestedContainer(fileName, entryMime):
|
||||
nestedId = makeId()
|
||||
parts.append(ContentPart(
|
||||
id=nestedId, parentId=parentId, label=fileName,
|
||||
typeGroup="container", mimeType=entryMime, data="",
|
||||
metadata={"size": len(entryData), "containerPath": entryPath},
|
||||
))
|
||||
nested = _resolveContainerRecursive(
|
||||
entryData, entryMime, fileName, nestedId, entryPath, depth + 1, state
|
||||
)
|
||||
parts.extend(nested)
|
||||
else:
|
||||
parts.extend(_addFilePart(entryData, fileName, parentId, containerPath, state))
|
||||
except ImportError:
|
||||
logger.warning("py7zr not installed -- 7z files will be treated as binary")
|
||||
parts.append(ContentPart(
|
||||
id=makeId(), parentId=parentId, label="unsupported",
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data="7z extraction requires py7zr package", metadata={"warning": True},
|
||||
))
|
||||
except Exception as e:
|
||||
logger.error(f"Invalid 7z file: {e}")
|
||||
parts.append(ContentPart(
|
||||
id=makeId(), parentId=parentId, label="error",
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data=f"Invalid 7z archive: {e}", metadata={"error": True},
|
||||
))
|
||||
return parts
|
||||
|
|
@ -74,19 +74,33 @@ class DocxExtractor(Extractor):
|
|||
with io.BytesIO(fileBytes) as buf:
|
||||
d = docx.Document(buf)
|
||||
# paragraphs
|
||||
fileName = context.get("fileName", "document.docx")
|
||||
headingIndex = 0
|
||||
currentSection = "body"
|
||||
for i, para in enumerate(d.paragraphs):
|
||||
text = para.text or ""
|
||||
if text.strip():
|
||||
parts.append(ContentPart(
|
||||
id=makeId(),
|
||||
parentId=rootId,
|
||||
label=f"p_{i+1}",
|
||||
typeGroup="text",
|
||||
mimeType="text/plain",
|
||||
data=text,
|
||||
metadata={"size": len(text.encode('utf-8'))}
|
||||
))
|
||||
# tables → CSV rows
|
||||
if not text.strip():
|
||||
continue
|
||||
styleName = (para.style.name or "").lower() if para.style else ""
|
||||
if "heading" in styleName:
|
||||
headingIndex += 1
|
||||
currentSection = f"heading:{headingIndex}"
|
||||
parts.append(ContentPart(
|
||||
id=makeId(),
|
||||
parentId=rootId,
|
||||
label=f"p_{i+1}",
|
||||
typeGroup="text",
|
||||
mimeType="text/plain",
|
||||
data=text,
|
||||
metadata={
|
||||
"size": len(text.encode('utf-8')),
|
||||
"contextRef": {
|
||||
"containerPath": fileName,
|
||||
"location": f"paragraph:{i+1}",
|
||||
"sectionId": currentSection,
|
||||
},
|
||||
}
|
||||
))
|
||||
for ti, table in enumerate(d.tables):
|
||||
rows: list[str] = []
|
||||
for row in table.rows:
|
||||
|
|
@ -101,7 +115,14 @@ class DocxExtractor(Extractor):
|
|||
typeGroup="table",
|
||||
mimeType="text/csv",
|
||||
data=csvData,
|
||||
metadata={"size": len(csvData.encode('utf-8'))}
|
||||
metadata={
|
||||
"size": len(csvData.encode('utf-8')),
|
||||
"contextRef": {
|
||||
"containerPath": fileName,
|
||||
"location": f"table:{ti+1}",
|
||||
"sectionId": currentSection,
|
||||
},
|
||||
}
|
||||
))
|
||||
|
||||
return parts
|
||||
|
|
|
|||
|
|
@ -0,0 +1,230 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Email extractor for EML and MSG files.
|
||||
|
||||
Parses email headers, body (text/html), and attachments.
|
||||
Attachments are delegated to the ExtractorRegistry for type-specific processing.
|
||||
|
||||
Optional dependency: extract-msg (for .msg files).
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
import email
|
||||
import email.policy
|
||||
import email.utils
|
||||
import io
|
||||
import logging
|
||||
import mimetypes
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..subUtils import makeId
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_EMAIL_MIME_TYPES = [
|
||||
"message/rfc822",
|
||||
"application/vnd.ms-outlook",
|
||||
]
|
||||
_EMAIL_EXTENSIONS = [".eml", ".msg"]
|
||||
|
||||
|
||||
class EmailExtractor(Extractor):
|
||||
"""Extractor for email files (EML, MSG).
|
||||
|
||||
Produces:
|
||||
- 1 text ContentPart with header metadata (From, To, Subject, Date)
|
||||
- 1 text ContentPart per body part (plain text / HTML)
|
||||
- Delegated ContentParts for each attachment via ExtractorRegistry
|
||||
"""
|
||||
|
||||
def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
|
||||
if mimeType in _EMAIL_MIME_TYPES:
|
||||
return True
|
||||
lower = (fileName or "").lower()
|
||||
return any(lower.endswith(ext) for ext in _EMAIL_EXTENSIONS)
|
||||
|
||||
def getSupportedExtensions(self) -> list[str]:
|
||||
return list(_EMAIL_EXTENSIONS)
|
||||
|
||||
def getSupportedMimeTypes(self) -> list[str]:
|
||||
return list(_EMAIL_MIME_TYPES)
|
||||
|
||||
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
|
||||
fileName = context.get("fileName", "email")
|
||||
lower = (fileName or "").lower()
|
||||
|
||||
if lower.endswith(".msg"):
|
||||
return self._extractMsg(fileBytes, fileName)
|
||||
return self._extractEml(fileBytes, fileName)
|
||||
|
||||
def _extractEml(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
|
||||
"""Parse standard EML (RFC 822) using stdlib email."""
|
||||
rootId = makeId()
|
||||
parts: List[ContentPart] = []
|
||||
|
||||
try:
|
||||
msg = email.message_from_bytes(fileBytes, policy=email.policy.default)
|
||||
except Exception as e:
|
||||
logger.error(f"EmailExtractor: failed to parse EML: {e}")
|
||||
return [ContentPart(
|
||||
id=rootId, parentId=None, label=fileName,
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data=f"Failed to parse email: {e}", metadata={"error": True},
|
||||
)]
|
||||
|
||||
headerText = _buildHeaderText(msg)
|
||||
parts.append(ContentPart(
|
||||
id=rootId, parentId=None, label="headers",
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data=headerText, metadata={"emailPart": "headers"},
|
||||
))
|
||||
|
||||
for part in msg.walk():
|
||||
contentType = part.get_content_type()
|
||||
disposition = str(part.get("Content-Disposition", ""))
|
||||
|
||||
if part.is_multipart():
|
||||
continue
|
||||
|
||||
if "attachment" in disposition:
|
||||
attachName = part.get_filename() or "attachment"
|
||||
attachData = part.get_payload(decode=True)
|
||||
if attachData:
|
||||
parts.extend(_delegateAttachment(attachData, attachName, rootId))
|
||||
continue
|
||||
|
||||
if contentType == "text/plain":
|
||||
body = part.get_content()
|
||||
if body:
|
||||
parts.append(ContentPart(
|
||||
id=makeId(), parentId=rootId, label="body_text",
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data=str(body), metadata={"emailPart": "body"},
|
||||
))
|
||||
elif contentType == "text/html":
|
||||
body = part.get_content()
|
||||
if body:
|
||||
parts.append(ContentPart(
|
||||
id=makeId(), parentId=rootId, label="body_html",
|
||||
typeGroup="text", mimeType="text/html",
|
||||
data=str(body), metadata={"emailPart": "body_html"},
|
||||
))
|
||||
|
||||
return parts
|
||||
|
||||
def _extractMsg(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
|
||||
"""Parse Outlook MSG files using extract-msg (optional)."""
|
||||
rootId = makeId()
|
||||
parts: List[ContentPart] = []
|
||||
|
||||
try:
|
||||
import extract_msg
|
||||
except ImportError:
|
||||
logger.warning("extract-msg not installed -- MSG files will be treated as binary")
|
||||
return [ContentPart(
|
||||
id=rootId, parentId=None, label=fileName,
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data="MSG extraction requires the extract-msg package.",
|
||||
metadata={"warning": True},
|
||||
)]
|
||||
|
||||
try:
|
||||
msgFile = extract_msg.Message(io.BytesIO(fileBytes))
|
||||
except Exception as e:
|
||||
logger.error(f"EmailExtractor: failed to parse MSG: {e}")
|
||||
return [ContentPart(
|
||||
id=rootId, parentId=None, label=fileName,
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data=f"Failed to parse MSG: {e}", metadata={"error": True},
|
||||
)]
|
||||
|
||||
headerLines = []
|
||||
if msgFile.sender:
|
||||
headerLines.append(f"From: {msgFile.sender}")
|
||||
if msgFile.to:
|
||||
headerLines.append(f"To: {msgFile.to}")
|
||||
if getattr(msgFile, "cc", None):
|
||||
headerLines.append(f"Cc: {msgFile.cc}")
|
||||
if msgFile.subject:
|
||||
headerLines.append(f"Subject: {msgFile.subject}")
|
||||
if msgFile.date:
|
||||
headerLines.append(f"Date: {msgFile.date}")
|
||||
|
||||
parts.append(ContentPart(
|
||||
id=rootId, parentId=None, label="headers",
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data="\n".join(headerLines), metadata={"emailPart": "headers"},
|
||||
))
|
||||
|
||||
body = msgFile.body
|
||||
if body:
|
||||
parts.append(ContentPart(
|
||||
id=makeId(), parentId=rootId, label="body_text",
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data=body, metadata={"emailPart": "body"},
|
||||
))
|
||||
|
||||
htmlBody = getattr(msgFile, "htmlBody", None)
|
||||
if htmlBody:
|
||||
if isinstance(htmlBody, bytes):
|
||||
htmlBody = htmlBody.decode("utf-8", errors="replace")
|
||||
parts.append(ContentPart(
|
||||
id=makeId(), parentId=rootId, label="body_html",
|
||||
typeGroup="text", mimeType="text/html",
|
||||
data=htmlBody, metadata={"emailPart": "body_html"},
|
||||
))
|
||||
|
||||
for attachment in (msgFile.attachments or []):
|
||||
attachName = getattr(attachment, "longFilename", None) or getattr(attachment, "shortFilename", None) or "attachment"
|
||||
attachData = getattr(attachment, "data", None)
|
||||
if attachData:
|
||||
parts.extend(_delegateAttachment(attachData, attachName, rootId))
|
||||
|
||||
try:
|
||||
msgFile.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
def _buildHeaderText(msg) -> str:
|
||||
"""Build a readable text summary of key email headers."""
|
||||
lines = []
|
||||
for header in ("From", "To", "Cc", "Subject", "Date", "Message-ID"):
|
||||
value = msg.get(header)
|
||||
if value:
|
||||
lines.append(f"{header}: {value}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _delegateAttachment(attachData: bytes, attachName: str, parentId: str) -> List[ContentPart]:
|
||||
"""Delegate an attachment to the appropriate type-specific extractor."""
|
||||
guessedMime, _ = mimetypes.guess_type(attachName)
|
||||
detectedMime = guessedMime or "application/octet-stream"
|
||||
|
||||
from ..subRegistry import ExtractorRegistry
|
||||
registry = ExtractorRegistry()
|
||||
extractor = registry.resolve(detectedMime, attachName)
|
||||
|
||||
if extractor and not isinstance(extractor, EmailExtractor):
|
||||
try:
|
||||
childParts = extractor.extract(attachData, {"fileName": attachName, "mimeType": detectedMime})
|
||||
for part in childParts:
|
||||
part.parentId = parentId
|
||||
if not part.metadata:
|
||||
part.metadata = {}
|
||||
part.metadata["emailAttachment"] = attachName
|
||||
return childParts
|
||||
except Exception as e:
|
||||
logger.warning(f"Extractor failed for email attachment {attachName}: {e}")
|
||||
|
||||
import base64
|
||||
encodedData = base64.b64encode(attachData).decode("utf-8") if attachData else ""
|
||||
return [ContentPart(
|
||||
id=makeId(), parentId=parentId, label=attachName,
|
||||
typeGroup="binary", mimeType=detectedMime,
|
||||
data=encodedData,
|
||||
metadata={"size": len(attachData), "emailAttachment": attachName},
|
||||
)]
|
||||
|
|
@ -0,0 +1,184 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Folder extractor -- treats a local folder reference as a container.
|
||||
|
||||
Not registered in the MIME-based ExtractorRegistry (folders have no MIME type).
|
||||
Instead, called directly by agent tools (browseContainer) when handling folder references.
|
||||
|
||||
Applies the same safety limits as ContainerExtractor.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
import logging
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
|
||||
from ..subUtils import makeId
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from modules.datamodels.datamodelContent import ContainerLimitError, ContentContextRef
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_TOTAL_EXTRACTED_SIZE = 500 * 1024 * 1024
|
||||
MAX_FILE_COUNT = 10000
|
||||
MAX_DEPTH = 5
|
||||
|
||||
|
||||
class FolderExtractor(Extractor):
|
||||
"""Extracts contents from a local folder path.
|
||||
|
||||
Unlike other extractors, this does not receive fileBytes. Instead it
|
||||
receives a folder path via context["folderPath"] and walks the directory.
|
||||
"""
|
||||
|
||||
def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
|
||||
return False
|
||||
|
||||
def getSupportedExtensions(self) -> list[str]:
|
||||
return []
|
||||
|
||||
def getSupportedMimeTypes(self) -> list[str]:
|
||||
return []
|
||||
|
||||
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
|
||||
"""Extract folder contents.
|
||||
|
||||
context must contain:
|
||||
folderPath: str -- absolute path to the folder
|
||||
"""
|
||||
folderPath = context.get("folderPath", "")
|
||||
if not folderPath:
|
||||
return []
|
||||
|
||||
folder = Path(folderPath)
|
||||
if not folder.is_dir():
|
||||
logger.error(f"FolderExtractor: not a directory: {folderPath}")
|
||||
return []
|
||||
|
||||
rootId = makeId()
|
||||
parts: List[ContentPart] = [
|
||||
ContentPart(
|
||||
id=rootId,
|
||||
parentId=None,
|
||||
label=folder.name or "folder",
|
||||
typeGroup="container",
|
||||
mimeType="inode/directory",
|
||||
data="",
|
||||
metadata={"folderPath": str(folder), "containerType": "folder"},
|
||||
)
|
||||
]
|
||||
|
||||
state = {"totalSize": 0, "fileCount": 0}
|
||||
try:
|
||||
_walkFolder(folder, rootId, "", 0, state, parts)
|
||||
except ContainerLimitError as e:
|
||||
logger.warning(f"Folder extraction limit reached: {e}")
|
||||
parts.append(ContentPart(
|
||||
id=makeId(),
|
||||
parentId=rootId,
|
||||
label="limit_exceeded",
|
||||
typeGroup="text",
|
||||
mimeType="text/plain",
|
||||
data=str(e),
|
||||
metadata={"warning": "Folder extraction limit exceeded"},
|
||||
))
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
def _walkFolder(
|
||||
folder: Path,
|
||||
parentId: str,
|
||||
containerPath: str,
|
||||
depth: int,
|
||||
state: Dict[str, int],
|
||||
parts: List[ContentPart],
|
||||
) -> None:
|
||||
if depth > MAX_DEPTH:
|
||||
raise ContainerLimitError(f"Max folder depth {MAX_DEPTH} exceeded")
|
||||
|
||||
try:
|
||||
entries = sorted(folder.iterdir(), key=lambda p: (not p.is_dir(), p.name.lower()))
|
||||
except PermissionError:
|
||||
logger.warning(f"Permission denied: {folder}")
|
||||
return
|
||||
|
||||
for entry in entries:
|
||||
if entry.is_symlink():
|
||||
logger.debug(f"Skipping symlink: {entry}")
|
||||
continue
|
||||
|
||||
entryPath = f"{containerPath}/{entry.name}" if containerPath else entry.name
|
||||
|
||||
if entry.is_dir():
|
||||
folderId = makeId()
|
||||
parts.append(ContentPart(
|
||||
id=folderId,
|
||||
parentId=parentId,
|
||||
label=entry.name,
|
||||
typeGroup="container",
|
||||
mimeType="inode/directory",
|
||||
data="",
|
||||
metadata={"containerPath": entryPath, "containerType": "folder"},
|
||||
))
|
||||
_walkFolder(entry, folderId, entryPath, depth + 1, state, parts)
|
||||
|
||||
elif entry.is_file():
|
||||
try:
|
||||
fileSize = entry.stat().st_size
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
state["totalSize"] += fileSize
|
||||
state["fileCount"] += 1
|
||||
|
||||
if state["totalSize"] > MAX_TOTAL_EXTRACTED_SIZE:
|
||||
raise ContainerLimitError(f"Total extracted size exceeds {MAX_TOTAL_EXTRACTED_SIZE // (1024 * 1024)} MB")
|
||||
if state["fileCount"] > MAX_FILE_COUNT:
|
||||
raise ContainerLimitError(f"File count exceeds {MAX_FILE_COUNT}")
|
||||
|
||||
guessedMime, _ = mimetypes.guess_type(entry.name)
|
||||
detectedMime = guessedMime or "application/octet-stream"
|
||||
|
||||
from ..subRegistry import ExtractorRegistry
|
||||
registry = ExtractorRegistry()
|
||||
extractor = registry.resolve(detectedMime, entry.name)
|
||||
|
||||
if extractor and not isinstance(extractor, FolderExtractor):
|
||||
try:
|
||||
fileData = entry.read_bytes()
|
||||
childParts = extractor.extract(fileData, {"fileName": entry.name, "mimeType": detectedMime})
|
||||
for part in childParts:
|
||||
part.parentId = parentId
|
||||
if not part.metadata:
|
||||
part.metadata = {}
|
||||
part.metadata["containerPath"] = entryPath
|
||||
parts.extend(childParts)
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.warning(f"Type-extractor failed for {entry.name}: {e}")
|
||||
|
||||
import base64
|
||||
try:
|
||||
fileData = entry.read_bytes()
|
||||
encodedData = base64.b64encode(fileData).decode("utf-8")
|
||||
except Exception:
|
||||
encodedData = ""
|
||||
|
||||
parts.append(ContentPart(
|
||||
id=makeId(),
|
||||
parentId=parentId,
|
||||
label=entry.name,
|
||||
typeGroup="binary",
|
||||
mimeType=detectedMime,
|
||||
data=encodedData,
|
||||
metadata={
|
||||
"size": fileSize,
|
||||
"containerPath": entryPath,
|
||||
"contextRef": ContentContextRef(
|
||||
containerPath=entryPath,
|
||||
location="file",
|
||||
).model_dump(),
|
||||
},
|
||||
))
|
||||
|
|
@ -89,7 +89,15 @@ class PdfExtractor(Extractor):
|
|||
typeGroup="text",
|
||||
mimeType="text/plain",
|
||||
data=text,
|
||||
metadata={"pages": 1, "pageIndex": i, "size": len(text.encode('utf-8'))}
|
||||
metadata={
|
||||
"pages": 1, "pageIndex": i,
|
||||
"size": len(text.encode('utf-8')),
|
||||
"contextRef": {
|
||||
"containerPath": context.get("fileName", "document.pdf"),
|
||||
"location": f"page:{i+1}",
|
||||
"pageIndex": i,
|
||||
},
|
||||
}
|
||||
))
|
||||
except Exception:
|
||||
continue
|
||||
|
|
@ -114,7 +122,15 @@ class PdfExtractor(Extractor):
|
|||
typeGroup="text",
|
||||
mimeType="text/plain",
|
||||
data=text,
|
||||
metadata={"pages": 1, "pageIndex": i, "size": len(text.encode('utf-8'))}
|
||||
metadata={
|
||||
"pages": 1, "pageIndex": i,
|
||||
"size": len(text.encode('utf-8')),
|
||||
"contextRef": {
|
||||
"containerPath": context.get("fileName", "document.pdf"),
|
||||
"location": f"page:{i+1}",
|
||||
"pageIndex": i,
|
||||
},
|
||||
}
|
||||
))
|
||||
except Exception:
|
||||
continue
|
||||
|
|
@ -143,7 +159,14 @@ class PdfExtractor(Extractor):
|
|||
typeGroup="image",
|
||||
mimeType=f"image/{ext}",
|
||||
data=base64.b64encode(imgBytes).decode("utf-8"),
|
||||
metadata={"pageIndex": i, "size": len(imgBytes)}
|
||||
metadata={
|
||||
"pageIndex": i, "size": len(imgBytes),
|
||||
"contextRef": {
|
||||
"containerPath": context.get("fileName", "document.pdf"),
|
||||
"location": f"page:{i+1}/image:{j}",
|
||||
"pageIndex": i,
|
||||
},
|
||||
}
|
||||
))
|
||||
except Exception:
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -119,17 +119,22 @@ class PptxExtractor(Extractor):
|
|||
image_bytes = image.blob
|
||||
image_b64 = base64.b64encode(image_bytes).decode('utf-8')
|
||||
|
||||
# Create image part
|
||||
fileName = context.get("fileName", "presentation.pptx")
|
||||
image_part = ContentPart(
|
||||
id=f"slide_{slide_index}_image_{len(parts)}",
|
||||
label=f"Slide {slide_index} Image",
|
||||
typeGroup="image",
|
||||
mimeType="image/png", # Default to PNG
|
||||
mimeType="image/png",
|
||||
data=image_b64,
|
||||
metadata={
|
||||
"slide_number": slide_index,
|
||||
"shape_type": "image",
|
||||
"extracted_from": "powerpoint"
|
||||
"extracted_from": "powerpoint",
|
||||
"contextRef": {
|
||||
"containerPath": fileName,
|
||||
"location": f"slide:{slide_index}/image",
|
||||
"slideIndex": slide_index - 1,
|
||||
},
|
||||
}
|
||||
)
|
||||
parts.append(image_part)
|
||||
|
|
@ -140,6 +145,7 @@ class PptxExtractor(Extractor):
|
|||
if slide_content:
|
||||
slide_text = f"# Slide {slide_index}\n\n" + "\n\n".join(slide_content)
|
||||
|
||||
fileName = context.get("fileName", "presentation.pptx")
|
||||
slide_part = ContentPart(
|
||||
id=f"slide_{slide_index}",
|
||||
label=f"Slide {slide_index} Content",
|
||||
|
|
@ -150,7 +156,12 @@ class PptxExtractor(Extractor):
|
|||
"slide_number": slide_index,
|
||||
"content_type": "slide",
|
||||
"extracted_from": "powerpoint",
|
||||
"text_length": len(slide_text)
|
||||
"text_length": len(slide_text),
|
||||
"contextRef": {
|
||||
"containerPath": fileName,
|
||||
"location": f"slide:{slide_index}",
|
||||
"slideIndex": slide_index - 1,
|
||||
},
|
||||
}
|
||||
)
|
||||
parts.append(slide_part)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,208 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Video extractor for common video formats.
|
||||
|
||||
Extracts metadata (duration, resolution, codec, bitrate) and produces
|
||||
a `videostream` ContentPart. Video data is never base64-encoded due to size.
|
||||
|
||||
Optional dependency: mutagen (for rich metadata from MP4/WebM containers).
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
import logging
|
||||
import struct
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..subUtils import makeId
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_VIDEO_MIME_TYPES = [
|
||||
"video/mp4",
|
||||
"video/webm",
|
||||
"video/x-msvideo",
|
||||
"video/avi",
|
||||
"video/quicktime",
|
||||
"video/x-matroska",
|
||||
"video/x-ms-wmv",
|
||||
"video/mpeg",
|
||||
"video/ogg",
|
||||
]
|
||||
_VIDEO_EXTENSIONS = [".mp4", ".webm", ".avi", ".mov", ".mkv", ".wmv", ".mpeg", ".mpg", ".ogv"]
|
||||
|
||||
|
||||
class VideoExtractor(Extractor):
|
||||
"""Extractor for video files.
|
||||
|
||||
Produces:
|
||||
- 1 text ContentPart with metadata summary
|
||||
- 1 videostream ContentPart (no inline data -- too large)
|
||||
"""
|
||||
|
||||
def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
|
||||
if mimeType in _VIDEO_MIME_TYPES:
|
||||
return True
|
||||
lower = (fileName or "").lower()
|
||||
return any(lower.endswith(ext) for ext in _VIDEO_EXTENSIONS)
|
||||
|
||||
def getSupportedExtensions(self) -> list[str]:
|
||||
return list(_VIDEO_EXTENSIONS)
|
||||
|
||||
def getSupportedMimeTypes(self) -> list[str]:
|
||||
return list(_VIDEO_MIME_TYPES)
|
||||
|
||||
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
|
||||
fileName = context.get("fileName", "video")
|
||||
mimeType = context.get("mimeType") or "video/mp4"
|
||||
fileSize = len(fileBytes)
|
||||
|
||||
rootId = makeId()
|
||||
parts: List[ContentPart] = []
|
||||
|
||||
meta = _extractMetadata(fileBytes, fileName)
|
||||
meta["size"] = fileSize
|
||||
meta["fileName"] = fileName
|
||||
meta["mimeType"] = mimeType
|
||||
|
||||
metaLines = [f"Video file: {fileName}"]
|
||||
if meta.get("duration"):
|
||||
mins = int(meta["duration"] // 60)
|
||||
secs = int(meta["duration"] % 60)
|
||||
metaLines.append(f"Duration: {mins}:{secs:02d}")
|
||||
if meta.get("width") and meta.get("height"):
|
||||
metaLines.append(f"Resolution: {meta['width']}x{meta['height']}")
|
||||
if meta.get("codec"):
|
||||
metaLines.append(f"Codec: {meta['codec']}")
|
||||
if meta.get("bitrate"):
|
||||
metaLines.append(f"Bitrate: {meta['bitrate']} kbps")
|
||||
if meta.get("fps"):
|
||||
metaLines.append(f"FPS: {meta['fps']}")
|
||||
metaLines.append(f"Size: {fileSize:,} bytes")
|
||||
|
||||
parts.append(ContentPart(
|
||||
id=rootId, parentId=None, label="metadata",
|
||||
typeGroup="text", mimeType="text/plain",
|
||||
data="\n".join(metaLines), metadata=meta,
|
||||
))
|
||||
|
||||
parts.append(ContentPart(
|
||||
id=makeId(), parentId=rootId, label="videostream",
|
||||
typeGroup="videostream", mimeType=mimeType,
|
||||
data="", metadata={"size": fileSize, "inlined": False},
|
||||
))
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
def _extractMetadata(fileBytes: bytes, fileName: str) -> Dict[str, Any]:
|
||||
"""Extract video metadata using mutagen (optional) with basic fallback."""
|
||||
meta: Dict[str, Any] = {}
|
||||
|
||||
try:
|
||||
import mutagen
|
||||
import io
|
||||
mediaFile = mutagen.File(io.BytesIO(fileBytes))
|
||||
if mediaFile is not None and mediaFile.info:
|
||||
meta["duration"] = getattr(mediaFile.info, "length", None)
|
||||
meta["bitrate"] = getattr(mediaFile.info, "bitrate", None)
|
||||
if meta["bitrate"]:
|
||||
meta["bitrate"] = meta["bitrate"] // 1000
|
||||
|
||||
if hasattr(mediaFile.info, "video"):
|
||||
for stream in (mediaFile.info.video if isinstance(mediaFile.info.video, list) else [mediaFile.info.video]):
|
||||
if hasattr(stream, "width"):
|
||||
meta["width"] = stream.width
|
||||
if hasattr(stream, "height"):
|
||||
meta["height"] = stream.height
|
||||
if hasattr(stream, "codec"):
|
||||
meta["codec"] = stream.codec
|
||||
|
||||
width = getattr(mediaFile.info, "width", None)
|
||||
height = getattr(mediaFile.info, "height", None)
|
||||
if width and height:
|
||||
meta["width"] = width
|
||||
meta["height"] = height
|
||||
|
||||
fps = getattr(mediaFile.info, "fps", None)
|
||||
if fps:
|
||||
meta["fps"] = round(fps, 2)
|
||||
|
||||
codec = getattr(mediaFile.info, "codec", None)
|
||||
if codec:
|
||||
meta["codec"] = codec
|
||||
|
||||
return {k: v for k, v in meta.items() if v is not None}
|
||||
except ImportError:
|
||||
logger.debug("mutagen not installed -- using basic video metadata extraction")
|
||||
except Exception as e:
|
||||
logger.debug(f"mutagen video metadata extraction failed: {e}")
|
||||
|
||||
lower = fileName.lower()
|
||||
if lower.endswith(".mp4"):
|
||||
meta.update(_parseMp4Header(fileBytes))
|
||||
elif lower.endswith(".avi"):
|
||||
meta.update(_parseAviHeader(fileBytes))
|
||||
|
||||
return {k: v for k, v in meta.items() if v is not None}
|
||||
|
||||
|
||||
def _parseMp4Header(fileBytes: bytes) -> Dict[str, Any]:
|
||||
"""Minimal MP4 moov/mvhd parser for duration and timescale."""
|
||||
meta: Dict[str, Any] = {}
|
||||
try:
|
||||
pos = 0
|
||||
while pos < len(fileBytes) - 8:
|
||||
boxSize = struct.unpack_from(">I", fileBytes, pos)[0]
|
||||
boxType = fileBytes[pos + 4:pos + 8]
|
||||
if boxSize < 8:
|
||||
break
|
||||
if boxType == b"moov":
|
||||
meta.update(_parseMoovBox(fileBytes[pos + 8:pos + boxSize]))
|
||||
break
|
||||
pos += boxSize
|
||||
except Exception:
|
||||
pass
|
||||
return meta
|
||||
|
||||
|
||||
def _parseMoovBox(data: bytes) -> Dict[str, Any]:
|
||||
"""Parse moov box to find mvhd with duration."""
|
||||
meta: Dict[str, Any] = {}
|
||||
pos = 0
|
||||
while pos < len(data) - 8:
|
||||
try:
|
||||
boxSize = struct.unpack_from(">I", data, pos)[0]
|
||||
boxType = data[pos + 4:pos + 8]
|
||||
if boxSize < 8:
|
||||
break
|
||||
if boxType == b"mvhd":
|
||||
version = data[pos + 8]
|
||||
if version == 0 and pos + 28 < len(data):
|
||||
timeScale = struct.unpack_from(">I", data, pos + 20)[0]
|
||||
duration = struct.unpack_from(">I", data, pos + 24)[0]
|
||||
if timeScale > 0:
|
||||
meta["duration"] = duration / timeScale
|
||||
break
|
||||
pos += boxSize
|
||||
except Exception:
|
||||
break
|
||||
return meta
|
||||
|
||||
|
||||
def _parseAviHeader(fileBytes: bytes) -> Dict[str, Any]:
|
||||
"""Minimal AVI header parser for resolution."""
|
||||
meta: Dict[str, Any] = {}
|
||||
if len(fileBytes) < 72:
|
||||
return meta
|
||||
try:
|
||||
if fileBytes[:4] != b"RIFF" or fileBytes[8:12] != b"AVI ":
|
||||
return meta
|
||||
width = struct.unpack_from("<I", fileBytes, 64)[0]
|
||||
height = struct.unpack_from("<I", fileBytes, 68)[0]
|
||||
if 0 < width < 100000 and 0 < height < 100000:
|
||||
meta["width"] = width
|
||||
meta["height"] = height
|
||||
except Exception:
|
||||
pass
|
||||
return meta
|
||||
|
|
@ -99,6 +99,7 @@ class XlsxExtractor(Extractor):
|
|||
cells.append(f'"{escaped_value}"')
|
||||
lines.append(",".join(cells))
|
||||
csvData = "\n".join(lines)
|
||||
fileName = context.get("fileName", "spreadsheet.xlsx")
|
||||
parts.append(ContentPart(
|
||||
id=makeId(),
|
||||
parentId=rootId,
|
||||
|
|
@ -106,7 +107,15 @@ class XlsxExtractor(Extractor):
|
|||
typeGroup="table",
|
||||
mimeType="text/csv",
|
||||
data=csvData,
|
||||
metadata={"sheet": sheetName, "size": len(csvData.encode('utf-8'))}
|
||||
metadata={
|
||||
"sheet": sheetName,
|
||||
"size": len(csvData.encode('utf-8')),
|
||||
"contextRef": {
|
||||
"containerPath": fileName,
|
||||
"location": f"sheet:{sheetName}",
|
||||
"sheetName": sheetName,
|
||||
},
|
||||
}
|
||||
))
|
||||
|
||||
return parts
|
||||
|
|
|
|||
|
|
@ -243,11 +243,7 @@ class ExtractionService:
|
|||
errorCount=0
|
||||
)
|
||||
|
||||
self._get_service("chat").storeWorkflowStat(
|
||||
self._context.workflow,
|
||||
aiResponse,
|
||||
f"extraction.process.{doc.mimeType}"
|
||||
)
|
||||
# Cost is recorded via billingCallback in _callWithModel
|
||||
|
||||
# Write extraction results to debug file
|
||||
try:
|
||||
|
|
@ -1230,15 +1226,52 @@ class ExtractionService:
|
|||
logger.info(f"Chunking {contentPart.typeGroup} part: contentSize={contentSize} bytes, textChunkSize={textChunkSize} bytes, structureChunkSize={structureChunkSize} bytes")
|
||||
chunks = chunker.chunk(contentPart, chunkingOptions)
|
||||
logger.info(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part (contentSize={contentSize} bytes)")
|
||||
if chunks:
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunkSize = len(chunk.get('data', '').encode('utf-8')) if chunk.get('data') else 0
|
||||
logger.info(f" Chunk {i+1}/{len(chunks)}: {chunkSize} bytes")
|
||||
return chunks
|
||||
|
||||
# Post-chunking validation: force line-based split on any chunk still exceeding target
|
||||
validatedChunks = []
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunkData = chunk.get('data', '')
|
||||
chunkSize = len(chunkData.encode('utf-8')) if chunkData else 0
|
||||
if chunkSize > availableContentBytes and chunkData:
|
||||
logger.warning(f" Chunk {i+1}/{len(chunks)}: {chunkSize} bytes exceeds target {availableContentBytes} bytes, force-splitting by lines")
|
||||
subChunks = self._forceLineSplit(chunkData, availableContentBytes, len(validatedChunks))
|
||||
validatedChunks.extend(subChunks)
|
||||
else:
|
||||
chunk["order"] = len(validatedChunks)
|
||||
validatedChunks.append(chunk)
|
||||
|
||||
if len(validatedChunks) != len(chunks):
|
||||
logger.info(f"Post-chunking validation: {len(chunks)} -> {len(validatedChunks)} chunks after force-splitting oversized chunks")
|
||||
|
||||
for i, chunk in enumerate(validatedChunks):
|
||||
chunkSize = len(chunk.get('data', '').encode('utf-8')) if chunk.get('data') else 0
|
||||
logger.info(f" Chunk {i+1}/{len(validatedChunks)}: {chunkSize} bytes")
|
||||
|
||||
return validatedChunks
|
||||
except Exception as e:
|
||||
logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}")
|
||||
return []
|
||||
|
||||
def _forceLineSplit(self, data: str, maxBytes: int, startOrder: int) -> List[Dict[str, Any]]:
|
||||
"""Line-based safety-net split for chunks that still exceed maxBytes after structured chunking."""
|
||||
chunks: List[Dict[str, Any]] = []
|
||||
current: List[str] = []
|
||||
size = 0
|
||||
for line in data.split('\n'):
|
||||
s = len(line.encode('utf-8')) + 1
|
||||
if size + s > maxBytes and current:
|
||||
text = '\n'.join(current)
|
||||
chunks.append({"data": text, "size": len(text.encode('utf-8')), "order": startOrder + len(chunks)})
|
||||
current = [line]
|
||||
size = s
|
||||
else:
|
||||
current.append(line)
|
||||
size += s
|
||||
if current:
|
||||
text = '\n'.join(current)
|
||||
chunks.append({"data": text, "size": len(text.encode('utf-8')), "order": startOrder + len(chunks)})
|
||||
return chunks
|
||||
|
||||
async def processContentPartWithFallback(self, contentPart, prompt: str, options, failoverModelList, aiObjects, progressCallback=None) -> AiCallResponse:
|
||||
"""Process a single content part with model-aware chunking and fallback.
|
||||
|
||||
|
|
@ -1386,73 +1419,210 @@ class ExtractionService:
|
|||
logger.warning(f"⚠️ Content part ({contentTokens:.0f} tokens est.) exceeds available space ({availableContentBytes/TOKEN_SAFETY_FACTOR:.0f} tokens est.), chunking required")
|
||||
|
||||
# If either condition fails, chunk the content
|
||||
# CRITICAL: IMAGE_GENERATE operations should NOT use chunking - they generate images from prompts, not process content chunks
|
||||
# CRITICAL: IMAGE_GENERATE operations should NOT use chunking
|
||||
if (totalTokens > maxTotalTokens or partSize > availableContentBytes) and options.operationType != OperationTypeEnum.IMAGE_GENERATE:
|
||||
# Part too large or total exceeds limit - chunk it (but not for image generation)
|
||||
chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt)
|
||||
if not chunks:
|
||||
raise ValueError(f"Failed to chunk content part for model {model.name}")
|
||||
|
||||
logger.info(f"Starting to process {len(chunks)} chunks with model {model.name}")
|
||||
# Parallel chunk processing with per-chunk failover
|
||||
remainingModels = failoverModelList[attempt:]
|
||||
allChunkResults, allResponses = await self._processChunksParallel(
|
||||
chunks, prompt, options, remainingModels, aiObjects, progressCallback
|
||||
)
|
||||
|
||||
if progressCallback:
|
||||
progressCallback(0.0, f"Starting to process {len(chunks)} chunks")
|
||||
if not allResponses:
|
||||
raise ValueError("All chunks failed for content part")
|
||||
|
||||
chunkResults = []
|
||||
for idx, chunk in enumerate(chunks):
|
||||
chunkNum = idx + 1
|
||||
chunkData = chunk.get('data', '')
|
||||
logger.info(f"Processing chunk {chunkNum}/{len(chunks)} with model {model.name}")
|
||||
mergedContent = self.mergePartResults(allResponses, options, [contentPart])
|
||||
|
||||
if progressCallback:
|
||||
progressCallback(chunkNum / len(chunks), f"Processing chunk {chunkNum}/{len(chunks)}")
|
||||
# Stitch pass: reconcile cross-chunk artifacts when multiple chunks were processed
|
||||
if len(allResponses) > 1:
|
||||
mergedContent = await self._stitchChunkResults(
|
||||
mergedContent, len(allResponses), prompt, options, aiObjects
|
||||
)
|
||||
|
||||
try:
|
||||
chunkResponse = await aiObjects._callWithModel(model, prompt, chunkData, options)
|
||||
chunkResults.append(chunkResponse)
|
||||
except Exception as chunkError:
|
||||
logger.error(f"Error processing chunk {chunkNum}/{len(chunks)}: {str(chunkError)}")
|
||||
# Continue with other chunks even if one fails
|
||||
continue
|
||||
|
||||
# Merge chunk results
|
||||
if not chunkResults:
|
||||
raise ValueError(f"All chunks failed for content part")
|
||||
|
||||
# Pass original contentPart to preserve typeGroup for all chunks (one-to-many: 1 part -> N chunks)
|
||||
mergedContent = self.mergePartResults(chunkResults, options, [contentPart])
|
||||
return AiCallResponse(
|
||||
content=mergedContent,
|
||||
modelName=model.name,
|
||||
provider=model.connectorType,
|
||||
priceCHF=sum(r.priceCHF for r in chunkResults),
|
||||
processingTime=sum(r.processingTime for r in chunkResults),
|
||||
bytesSent=sum(r.bytesSent for r in chunkResults),
|
||||
bytesReceived=sum(r.bytesReceived for r in chunkResults),
|
||||
errorCount=sum(r.errorCount for r in chunkResults)
|
||||
priceCHF=sum(r.priceCHF for r in allResponses),
|
||||
processingTime=sum(r.processingTime for r in allResponses),
|
||||
bytesSent=sum(r.bytesSent for r in allResponses),
|
||||
bytesReceived=sum(r.bytesReceived for r in allResponses),
|
||||
errorCount=sum(r.errorCount for r in allResponses)
|
||||
)
|
||||
else:
|
||||
# Part fits - call AI directly via aiObjects interface
|
||||
logger.info(f"✅ Content part fits within model limits, processing directly")
|
||||
# Part fits - call AI directly
|
||||
logger.info(f"Content part fits within model limits, processing directly")
|
||||
response = await aiObjects._callWithModel(model, prompt, contentPart.data, options)
|
||||
logger.info(f"✅ Content part processed successfully with model: {model.name}")
|
||||
logger.info(f"Content part processed successfully with model: {model.name}")
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
lastError = e
|
||||
error_msg = str(e) if str(e) else f"{type(e).__name__}"
|
||||
logger.warning(f"❌ Model {model.name} failed for content part: {error_msg}", exc_info=True)
|
||||
logger.warning(f"Model {model.name} failed for content part: {error_msg}", exc_info=True)
|
||||
|
||||
if attempt < len(failoverModelList) - 1:
|
||||
logger.info(f"🔄 Trying next failover model...")
|
||||
logger.info(f"Trying next failover model...")
|
||||
continue
|
||||
else:
|
||||
logger.error(f"💥 All {len(failoverModelList)} models failed for content part")
|
||||
logger.error(f"All {len(failoverModelList)} models failed for content part")
|
||||
break
|
||||
|
||||
# All models failed
|
||||
return self._createErrorResponse(f"All models failed: {str(lastError)}", 0, 0)
|
||||
|
||||
async def _processChunksParallel(
|
||||
self,
|
||||
chunks: List[Dict[str, Any]],
|
||||
prompt: str,
|
||||
options,
|
||||
failoverModels: list,
|
||||
aiObjects,
|
||||
progressCallback=None,
|
||||
maxRetries: int = 3
|
||||
) -> tuple:
|
||||
"""Process chunks in parallel. On failure, re-chunk only the failed chunks for the next model.
|
||||
|
||||
Returns (orderedResults, allResponses) where orderedResults is a dict
|
||||
mapping original order -> AiCallResponse and allResponses is a flat list.
|
||||
"""
|
||||
if not failoverModels:
|
||||
return {}, []
|
||||
|
||||
pendingChunks = [(chunk.get("order", i), chunk) for i, chunk in enumerate(chunks)]
|
||||
completedResults: Dict[float, AiCallResponse] = {}
|
||||
allResponses: List[AiCallResponse] = []
|
||||
retryCount = 0
|
||||
modelIdx = 0
|
||||
currentModel = failoverModels[modelIdx]
|
||||
|
||||
maxConcurrent = 3
|
||||
semaphore = asyncio.Semaphore(maxConcurrent)
|
||||
|
||||
logger.info(f"Starting parallel chunk processing: {len(pendingChunks)} chunks with model {currentModel.name}")
|
||||
|
||||
while pendingChunks and retryCount <= maxRetries and currentModel:
|
||||
modelForRound = currentModel
|
||||
totalInRound = len(pendingChunks)
|
||||
completedInRound = [0]
|
||||
|
||||
async def _processOneChunk(order: float, chunkData: str, model=modelForRound):
|
||||
async with semaphore:
|
||||
result = await aiObjects._callWithModel(model, prompt, chunkData, options)
|
||||
completedInRound[0] += 1
|
||||
if progressCallback:
|
||||
progressCallback(completedInRound[0] / totalInRound, f"Chunk {completedInRound[0]}/{totalInRound} completed")
|
||||
return result
|
||||
|
||||
tasks = {}
|
||||
for order, chunk in pendingChunks:
|
||||
chunkData = chunk.get('data', '')
|
||||
tasks[order] = asyncio.create_task(_processOneChunk(order, chunkData))
|
||||
|
||||
if progressCallback:
|
||||
progressCallback(0.0, f"Processing {len(tasks)} chunks in parallel with {currentModel.name}")
|
||||
|
||||
results = await asyncio.gather(*tasks.values(), return_exceptions=True)
|
||||
|
||||
failedChunks = []
|
||||
for (order, chunk), result in zip(pendingChunks, results):
|
||||
if isinstance(result, Exception):
|
||||
logger.warning(f"Chunk order={order} failed with {currentModel.name}: {result}")
|
||||
failedChunks.append((order, chunk))
|
||||
else:
|
||||
completedResults[order] = result
|
||||
allResponses.append(result)
|
||||
|
||||
logger.info(f"Round {retryCount}: {len(pendingChunks) - len(failedChunks)}/{len(pendingChunks)} chunks succeeded with {currentModel.name}")
|
||||
|
||||
if not failedChunks:
|
||||
break
|
||||
|
||||
retryCount += 1
|
||||
modelIdx += 1
|
||||
if modelIdx >= len(failoverModels):
|
||||
logger.error(f"No more failover models available, {len(failedChunks)} chunks remain failed")
|
||||
break
|
||||
|
||||
currentModel = failoverModels[modelIdx]
|
||||
logger.info(f"Failover: re-chunking {len(failedChunks)} failed chunks for model {currentModel.name}")
|
||||
|
||||
newPending = []
|
||||
for order, failedChunk in failedChunks:
|
||||
reChunked = await self._reChunkForModel(failedChunk, currentModel, prompt, options)
|
||||
for i, subChunk in enumerate(reChunked):
|
||||
subOrder = order + i * 0.001
|
||||
newPending.append((subOrder, subChunk))
|
||||
|
||||
pendingChunks = newPending
|
||||
|
||||
orderedResponses = [completedResults[k] for k in sorted(completedResults.keys())]
|
||||
return orderedResponses, allResponses
|
||||
|
||||
async def _reChunkForModel(self, chunk: Dict[str, Any], model, prompt: str, options) -> List[Dict[str, Any]]:
|
||||
"""Re-chunk a single failed chunk according to the new model's context limits."""
|
||||
chunkData = chunk.get('data', '')
|
||||
tempPart = ContentPart(
|
||||
id=f"rechunk_{uuid.uuid4().hex[:8]}",
|
||||
label="re-chunk",
|
||||
typeGroup="structure" if chunkData.strip().startswith(('{', '[')) else "text",
|
||||
mimeType="application/json" if chunkData.strip().startswith(('{', '[')) else "text/plain",
|
||||
data=chunkData
|
||||
)
|
||||
reChunked = await self.chunkContentPartForAi(tempPart, model, options, prompt)
|
||||
if not reChunked:
|
||||
return [chunk]
|
||||
return reChunked
|
||||
|
||||
async def _stitchChunkResults(
|
||||
self,
|
||||
mergedContent: str,
|
||||
chunkCount: int,
|
||||
originalPrompt: str,
|
||||
options,
|
||||
aiObjects
|
||||
) -> str:
|
||||
"""Reconcile cross-chunk artifacts in merged content.
|
||||
|
||||
Only called when chunkCount > 1. Delegates to aiObjects.callWithTextContext
|
||||
which handles model selection, failover, and billing.
|
||||
"""
|
||||
mergedSize = len(mergedContent.encode('utf-8')) if mergedContent else 0
|
||||
|
||||
stitchPrompt = (
|
||||
"The following content was assembled from multiple independently processed "
|
||||
f"chunks ({chunkCount} chunks) of the same document. "
|
||||
"Review and fix ONLY these issues, preserving all content:\n"
|
||||
"1. Cross-references that point to content from other chunks\n"
|
||||
"2. Duplicate text at chunk boundaries (remove duplicates)\n"
|
||||
"3. Sentences or paragraphs split mid-thought (reconnect them)\n"
|
||||
"4. Inconsistent terminology for the same entity\n\n"
|
||||
"Do NOT add, remove, or rephrase content beyond these fixes. "
|
||||
"Return the corrected content in the same format.\n\n"
|
||||
f"Original processing instruction (truncated): {originalPrompt[:500]}"
|
||||
)
|
||||
|
||||
try:
|
||||
logger.info(f"Running stitch pass on {mergedSize} bytes")
|
||||
request = AiCallRequest(
|
||||
prompt=stitchPrompt,
|
||||
context=mergedContent,
|
||||
options=AiCallOptions(operationType=OperationTypeEnum.DATA_ANALYSE)
|
||||
)
|
||||
response = await aiObjects.callWithTextContext(request)
|
||||
if hasattr(response, 'errorCount') and response.errorCount > 0:
|
||||
logger.warning(f"Stitch pass returned error: {response.content[:200] if response.content else 'empty'}")
|
||||
return mergedContent
|
||||
resultSize = len(response.content.encode('utf-8')) if response.content else 0
|
||||
logger.info(f"Stitch pass completed: {mergedSize} -> {resultSize} bytes")
|
||||
return response.content
|
||||
except Exception as e:
|
||||
logger.warning(f"Stitch pass failed (non-fatal), returning unstitched content: {e}")
|
||||
return mergedContent
|
||||
|
||||
def _createErrorResponse(self, errorMsg: str, inputBytes: int, outputBytes: int) -> AiCallResponse:
|
||||
"""Create an error response."""
|
||||
return AiCallResponse(
|
||||
|
|
@ -1521,9 +1691,18 @@ class ExtractionService:
|
|||
progressCallback(0.1 + (partIndex / totalParts) * 0.8, f"Processing {partLabel} ({partType}) - {partIndex+1}/{totalParts}")
|
||||
|
||||
try:
|
||||
# Process the part
|
||||
partProgressCb = None
|
||||
if progressCallback:
|
||||
partStart = 0.1 + (partIndex / totalParts) * 0.8
|
||||
partRange = 0.8 / totalParts
|
||||
def _makePartProgressCb(start, rangeSize):
|
||||
def _cb(chunkProgress, message):
|
||||
progressCallback(start + chunkProgress * rangeSize, message)
|
||||
return _cb
|
||||
partProgressCb = _makePartProgressCb(partStart, partRange)
|
||||
|
||||
partResult = await self.processContentPartWithFallback(
|
||||
contentPart, prompt, options, failoverModelList, aiObjects, None # Don't pass progressCallback to avoid double logging
|
||||
contentPart, prompt, options, failoverModelList, aiObjects, partProgressCb
|
||||
)
|
||||
|
||||
# Write debug files for generation phase (section content generation)
|
||||
|
|
|
|||
|
|
@ -191,9 +191,11 @@ class ChunkerRegistry:
|
|||
self.register("table", TableChunker())
|
||||
self.register("structure", StructureChunker())
|
||||
self.register("image", ImageChunker())
|
||||
# Use text chunker for container and binary content
|
||||
# Use text chunker for container, binary, and media stream content
|
||||
self.register("container", TextChunker())
|
||||
self.register("binary", TextChunker())
|
||||
self.register("audiostream", TextChunker())
|
||||
self.register("videostream", TextChunker())
|
||||
except Exception as e:
|
||||
logger.error(f"ChunkerRegistry: Failed to register chunkers: {str(e)}")
|
||||
import traceback
|
||||
|
|
|
|||
|
|
@ -0,0 +1,3 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""serviceKnowledge: 3-tier RAG Knowledge Store with semantic search."""
|
||||
|
|
@ -0,0 +1,531 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Knowledge service: 3-tier RAG with indexing, semantic search, and context building."""
|
||||
|
||||
import logging
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from modules.datamodels.datamodelKnowledge import (
|
||||
FileContentIndex, ContentChunk, WorkflowMemory,
|
||||
)
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_CHUNK_SIZE = 512
|
||||
DEFAULT_CONTEXT_BUDGET = 8000
|
||||
|
||||
|
||||
class KnowledgeService:
|
||||
"""Service for Knowledge Store operations: indexing, retrieval, and context building."""
|
||||
|
||||
def __init__(self, context, get_service: Callable[[str], Any]):
|
||||
self._context = context
|
||||
self._getService = get_service
|
||||
self._knowledgeDb = getKnowledgeInterface(context.user)
|
||||
|
||||
# =========================================================================
|
||||
# Embedding helper
|
||||
# =========================================================================
|
||||
|
||||
async def _embed(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Embed texts via the AI interface's generic embedding method."""
|
||||
aiService = self._getService("ai")
|
||||
await aiService.ensureAiObjectsInitialized()
|
||||
aiObjects = aiService.aiObjects
|
||||
if aiObjects is None:
|
||||
logger.warning("Embedding skipped: aiObjects not available")
|
||||
return []
|
||||
response = await aiObjects.callEmbedding(texts)
|
||||
if response.errorCount > 0:
|
||||
logger.error(f"Embedding failed: {response.content}")
|
||||
return []
|
||||
return (response.metadata or {}).get("embeddings", [])
|
||||
|
||||
async def _embedSingle(self, text: str) -> List[float]:
|
||||
"""Embed a single text. Returns empty list on failure."""
|
||||
results = await self._embed([text])
|
||||
return results[0] if results else []
|
||||
|
||||
# =========================================================================
|
||||
# File Indexing (called after extraction, before embedding)
|
||||
# =========================================================================
|
||||
|
||||
async def indexFile(
|
||||
self,
|
||||
fileId: str,
|
||||
fileName: str,
|
||||
mimeType: str,
|
||||
userId: str,
|
||||
featureInstanceId: str = "",
|
||||
mandateId: str = "",
|
||||
contentObjects: List[Dict[str, Any]] = None,
|
||||
structure: Dict[str, Any] = None,
|
||||
containerPath: str = None,
|
||||
) -> FileContentIndex:
|
||||
"""Index a file's content objects and create embeddings for text chunks.
|
||||
|
||||
This is the main entry point after non-AI extraction has produced content objects.
|
||||
|
||||
Args:
|
||||
fileId: The file ID.
|
||||
fileName: Original file name.
|
||||
mimeType: MIME type.
|
||||
userId: Owner user.
|
||||
featureInstanceId: Feature instance scope.
|
||||
mandateId: Mandate scope.
|
||||
contentObjects: List of extracted content objects, each with keys:
|
||||
contentType (str), data (str), contextRef (dict), contentObjectId (str).
|
||||
structure: Structural overview of the file.
|
||||
containerPath: Path within container if applicable.
|
||||
|
||||
Returns:
|
||||
The created FileContentIndex.
|
||||
"""
|
||||
contentObjects = contentObjects or []
|
||||
|
||||
# 1. Create FileContentIndex
|
||||
index = FileContentIndex(
|
||||
id=fileId,
|
||||
userId=userId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
mandateId=mandateId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
containerPath=containerPath,
|
||||
totalObjects=len(contentObjects),
|
||||
totalSize=sum(len(obj.get("data", "").encode("utf-8")) for obj in contentObjects),
|
||||
structure=structure or {},
|
||||
objectSummary=[
|
||||
{
|
||||
"id": obj.get("contentObjectId", ""),
|
||||
"type": obj.get("contentType", "other"),
|
||||
"size": len(obj.get("data", "").encode("utf-8")),
|
||||
"ref": obj.get("contextRef", {}),
|
||||
}
|
||||
for obj in contentObjects
|
||||
],
|
||||
status="extracted",
|
||||
)
|
||||
self._knowledgeDb.upsertFileContentIndex(index)
|
||||
|
||||
# 2. Chunk text content objects and create embeddings
|
||||
textObjects = [o for o in contentObjects if o.get("contentType") == "text"]
|
||||
if textObjects:
|
||||
self._knowledgeDb.updateFileStatus(fileId, "embedding")
|
||||
chunks = _chunkForEmbedding(textObjects, chunkSize=DEFAULT_CHUNK_SIZE)
|
||||
texts = [c["data"] for c in chunks]
|
||||
|
||||
embeddings = await self._embed(texts) if texts else []
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
embedding = embeddings[i] if i < len(embeddings) else None
|
||||
contentChunk = ContentChunk(
|
||||
contentObjectId=chunk["contentObjectId"],
|
||||
fileId=fileId,
|
||||
userId=userId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
contentType="text",
|
||||
data=chunk["data"],
|
||||
contextRef=chunk["contextRef"],
|
||||
embedding=embedding,
|
||||
)
|
||||
self._knowledgeDb.upsertContentChunk(contentChunk)
|
||||
|
||||
# 3. Store non-text content objects (images, etc.) without embedding
|
||||
nonTextObjects = [o for o in contentObjects if o.get("contentType") != "text"]
|
||||
for obj in nonTextObjects:
|
||||
contentChunk = ContentChunk(
|
||||
contentObjectId=obj.get("contentObjectId", ""),
|
||||
fileId=fileId,
|
||||
userId=userId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
contentType=obj.get("contentType", "other"),
|
||||
data=obj.get("data", ""),
|
||||
contextRef=obj.get("contextRef", {}),
|
||||
embedding=None,
|
||||
)
|
||||
self._knowledgeDb.upsertContentChunk(contentChunk)
|
||||
|
||||
self._knowledgeDb.updateFileStatus(fileId, "indexed")
|
||||
index.status = "indexed"
|
||||
logger.info(f"Indexed file {fileId} ({fileName}): {len(contentObjects)} objects, {len(textObjects)} text chunks")
|
||||
return index
|
||||
|
||||
# =========================================================================
|
||||
# RAG Context Building (3-tier search)
|
||||
# =========================================================================
|
||||
|
||||
async def buildAgentContext(
|
||||
self,
|
||||
currentPrompt: str,
|
||||
workflowId: str,
|
||||
userId: str,
|
||||
featureInstanceId: str = "",
|
||||
mandateId: str = "",
|
||||
contextBudget: int = DEFAULT_CONTEXT_BUDGET,
|
||||
) -> str:
|
||||
"""Build RAG context for an agent round by searching all 3 layers.
|
||||
|
||||
Args:
|
||||
currentPrompt: The current user prompt to find relevant context for.
|
||||
workflowId: Current workflow ID.
|
||||
userId: Current user.
|
||||
featureInstanceId: Feature instance scope.
|
||||
mandateId: Mandate scope.
|
||||
contextBudget: Maximum characters for the context string.
|
||||
|
||||
Returns:
|
||||
Formatted context string for injection into the agent's system prompt.
|
||||
"""
|
||||
queryVector = await self._embedSingle(currentPrompt)
|
||||
if not queryVector:
|
||||
return ""
|
||||
|
||||
builder = _ContextBuilder(budget=contextBudget)
|
||||
|
||||
# Layer 1: Instance Layer (user's own documents, highest priority)
|
||||
instanceChunks = self._knowledgeDb.semanticSearch(
|
||||
queryVector=queryVector,
|
||||
userId=userId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
limit=15,
|
||||
minScore=0.65,
|
||||
)
|
||||
if instanceChunks:
|
||||
builder.add(priority=1, label="Relevant Documents", items=instanceChunks)
|
||||
|
||||
# Layer 2: Workflow Layer (current workflow entities & memory)
|
||||
entities = self._knowledgeDb.getWorkflowEntities(workflowId)
|
||||
if entities:
|
||||
builder.add(priority=2, label="Workflow Context", items=entities, isKeyValue=True)
|
||||
|
||||
# Layer 3: Shared Layer (mandate-wide shared documents)
|
||||
sharedChunks = self._knowledgeDb.semanticSearch(
|
||||
queryVector=queryVector,
|
||||
mandateId=mandateId,
|
||||
isShared=True,
|
||||
limit=10,
|
||||
minScore=0.7,
|
||||
)
|
||||
if sharedChunks:
|
||||
builder.add(priority=3, label="Shared Knowledge", items=sharedChunks)
|
||||
|
||||
return builder.build()
|
||||
|
||||
# =========================================================================
|
||||
# Workflow Memory
|
||||
# =========================================================================
|
||||
|
||||
async def storeEntity(
|
||||
self,
|
||||
workflowId: str,
|
||||
userId: str,
|
||||
featureInstanceId: str,
|
||||
key: str,
|
||||
value: str,
|
||||
source: str = "extraction",
|
||||
) -> WorkflowMemory:
|
||||
"""Store a key-value entity in workflow memory with optional embedding."""
|
||||
embedding = await self._embedSingle(f"{key}: {value}")
|
||||
memory = WorkflowMemory(
|
||||
workflowId=workflowId,
|
||||
userId=userId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
key=key,
|
||||
value=value,
|
||||
source=source,
|
||||
embedding=embedding if embedding else None,
|
||||
)
|
||||
self._knowledgeDb.upsertWorkflowMemory(memory)
|
||||
return memory
|
||||
|
||||
def getEntities(self, workflowId: str) -> List[Dict[str, Any]]:
|
||||
"""Get all entities for a workflow."""
|
||||
return self._knowledgeDb.getWorkflowEntities(workflowId)
|
||||
|
||||
# =========================================================================
|
||||
# File Status
|
||||
# =========================================================================
|
||||
|
||||
def getFileStatus(self, fileId: str) -> Optional[str]:
|
||||
"""Get the indexing status of a file."""
|
||||
index = self._knowledgeDb.getFileContentIndex(fileId)
|
||||
return index.get("status") if index else None
|
||||
|
||||
def isFileIndexed(self, fileId: str) -> bool:
|
||||
"""Check if a file has been fully indexed."""
|
||||
return self.getFileStatus(fileId) == "indexed"
|
||||
|
||||
# =========================================================================
|
||||
# On-Demand Extraction (Smart Document Handling)
|
||||
# =========================================================================
|
||||
|
||||
async def readSection(self, fileId: str, sectionId: str) -> List[Dict[str, Any]]:
|
||||
"""Read content objects for a specific section. Uses cache if available.
|
||||
|
||||
Args:
|
||||
fileId: Source file ID.
|
||||
sectionId: Section identifier from the FileContentIndex structure.
|
||||
|
||||
Returns:
|
||||
List of content object dicts with data and contextRef.
|
||||
"""
|
||||
cached = self._knowledgeDb.getContentChunks(fileId)
|
||||
sectionChunks = [
|
||||
c for c in (cached or [])
|
||||
if (c.get("contextRef", {}).get("sectionId") == sectionId)
|
||||
]
|
||||
if sectionChunks:
|
||||
return sectionChunks
|
||||
|
||||
index = self._knowledgeDb.getFileContentIndex(fileId)
|
||||
if not index:
|
||||
return []
|
||||
|
||||
structure = index.get("structure", {}) if isinstance(index, dict) else getattr(index, "structure", {})
|
||||
sections = structure.get("sections", [])
|
||||
section = next((s for s in sections if s.get("id") == sectionId), None)
|
||||
if not section:
|
||||
return []
|
||||
|
||||
startPage = section.get("startPage", 0)
|
||||
endPage = section.get("endPage", startPage)
|
||||
|
||||
return await self._extractPagesOnDemand(fileId, startPage, endPage, sectionId)
|
||||
|
||||
async def readContentObjects(
|
||||
self, fileId: str, filter: Dict[str, Any] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Read content objects with optional filters (pageIndex, contentType, sectionId).
|
||||
|
||||
Args:
|
||||
fileId: Source file ID.
|
||||
filter: Optional dict with keys pageIndex (list[int]), contentType (str), sectionId (str).
|
||||
|
||||
Returns:
|
||||
Filtered list of content chunk dicts.
|
||||
"""
|
||||
filter = filter or {}
|
||||
chunks = self._knowledgeDb.getContentChunks(fileId) or []
|
||||
|
||||
if "pageIndex" in filter:
|
||||
targetPages = filter["pageIndex"]
|
||||
if isinstance(targetPages, int):
|
||||
targetPages = [targetPages]
|
||||
chunks = [
|
||||
c for c in chunks
|
||||
if c.get("contextRef", {}).get("pageIndex") in targetPages
|
||||
]
|
||||
|
||||
if "contentType" in filter:
|
||||
chunks = [c for c in chunks if c.get("contentType") == filter["contentType"]]
|
||||
|
||||
if "sectionId" in filter:
|
||||
chunks = [
|
||||
c for c in chunks
|
||||
if c.get("contextRef", {}).get("sectionId") == filter["sectionId"]
|
||||
]
|
||||
|
||||
return chunks
|
||||
|
||||
async def extractContainerItem(
|
||||
self, fileId: str, containerPath: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""On-demand extraction of a specific item within a container.
|
||||
|
||||
If the item is already indexed, returns existing data.
|
||||
Otherwise triggers extraction and indexing.
|
||||
|
||||
Args:
|
||||
fileId: The container file ID.
|
||||
containerPath: Path within the container (e.g. "folder/report.pdf").
|
||||
|
||||
Returns:
|
||||
FileContentIndex dict for the extracted item, or None.
|
||||
"""
|
||||
existing = self._knowledgeDb.getFileContentIndex(fileId)
|
||||
if existing:
|
||||
existingPath = existing.get("containerPath") if isinstance(existing, dict) else getattr(existing, "containerPath", None)
|
||||
if existingPath == containerPath:
|
||||
return existing
|
||||
|
||||
logger.info(f"On-demand extraction for {containerPath} in file {fileId}")
|
||||
return None
|
||||
|
||||
async def _extractPagesOnDemand(
|
||||
self, fileId: str, startPage: int, endPage: int, sectionId: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Extract specific pages from a file and cache in knowledge store."""
|
||||
try:
|
||||
chatService = self._getService("chat")
|
||||
fileContent = chatService.getFileContent(fileId)
|
||||
if not fileContent:
|
||||
return []
|
||||
|
||||
fileData = fileContent.get("data", b"")
|
||||
mimeType = fileContent.get("mimeType", "")
|
||||
fileName = fileContent.get("fileName", "")
|
||||
|
||||
if isinstance(fileData, str):
|
||||
import base64
|
||||
fileData = base64.b64decode(fileData)
|
||||
|
||||
if mimeType != "application/pdf":
|
||||
return []
|
||||
|
||||
try:
|
||||
import fitz
|
||||
except ImportError:
|
||||
return []
|
||||
|
||||
doc = fitz.open(stream=fileData, filetype="pdf")
|
||||
results = []
|
||||
|
||||
for pageIdx in range(startPage, min(endPage + 1, len(doc))):
|
||||
page = doc[pageIdx]
|
||||
text = page.get_text() or ""
|
||||
if not text.strip():
|
||||
continue
|
||||
|
||||
chunk = ContentChunk(
|
||||
contentObjectId=f"page-{pageIdx}",
|
||||
fileId=fileId,
|
||||
userId=self._context.user.id if self._context.user else "",
|
||||
featureInstanceId=self._context.feature_instance_id or "",
|
||||
contentType="text",
|
||||
data=text,
|
||||
contextRef={
|
||||
"containerPath": fileName,
|
||||
"location": f"page:{pageIdx+1}",
|
||||
"pageIndex": pageIdx,
|
||||
"sectionId": sectionId,
|
||||
},
|
||||
)
|
||||
|
||||
embedding = await self._embedSingle(text[:2000])
|
||||
if embedding:
|
||||
chunk.embedding = embedding
|
||||
|
||||
self._knowledgeDb.upsertContentChunk(chunk)
|
||||
results.append(chunk.model_dump())
|
||||
|
||||
doc.close()
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"On-demand page extraction failed: {e}")
|
||||
return []
|
||||
|
||||
def getFileContentIndex(self, fileId: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get the FileContentIndex for a file."""
|
||||
return self._knowledgeDb.getFileContentIndex(fileId)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Internal helpers
|
||||
# =============================================================================
|
||||
|
||||
def _chunkForEmbedding(
|
||||
textObjects: List[Dict[str, Any]], chunkSize: int = 512
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Split text content objects into chunks suitable for embedding.
|
||||
|
||||
Each chunk preserves the contextRef from its source object.
|
||||
Long texts are split at sentence boundaries where possible.
|
||||
"""
|
||||
chunks = []
|
||||
for obj in textObjects:
|
||||
text = obj.get("data", "")
|
||||
contentObjectId = obj.get("contentObjectId", "")
|
||||
contextRef = obj.get("contextRef", {})
|
||||
|
||||
if len(text) <= chunkSize:
|
||||
chunks.append({
|
||||
"data": text,
|
||||
"contentObjectId": contentObjectId,
|
||||
"contextRef": contextRef,
|
||||
})
|
||||
continue
|
||||
|
||||
# Split at sentence boundaries
|
||||
sentences = text.replace("\n", " ").split(". ")
|
||||
currentChunk = ""
|
||||
for sentence in sentences:
|
||||
candidate = f"{currentChunk}. {sentence}" if currentChunk else sentence
|
||||
if len(candidate) > chunkSize and currentChunk:
|
||||
chunks.append({
|
||||
"data": currentChunk.strip(),
|
||||
"contentObjectId": contentObjectId,
|
||||
"contextRef": contextRef,
|
||||
})
|
||||
currentChunk = sentence
|
||||
else:
|
||||
currentChunk = candidate
|
||||
|
||||
if currentChunk.strip():
|
||||
chunks.append({
|
||||
"data": currentChunk.strip(),
|
||||
"contentObjectId": contentObjectId,
|
||||
"contextRef": contextRef,
|
||||
})
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
class _ContextBuilder:
|
||||
"""Assembles RAG context from multiple sources respecting a character budget."""
|
||||
|
||||
def __init__(self, budget: int):
|
||||
self._budget = budget
|
||||
self._sections: List[Dict[str, Any]] = []
|
||||
|
||||
def add(
|
||||
self,
|
||||
priority: int,
|
||||
label: str,
|
||||
items: List[Dict[str, Any]],
|
||||
isKeyValue: bool = False,
|
||||
):
|
||||
self._sections.append({
|
||||
"priority": priority,
|
||||
"label": label,
|
||||
"items": items,
|
||||
"isKeyValue": isKeyValue,
|
||||
})
|
||||
|
||||
def build(self) -> str:
|
||||
self._sections.sort(key=lambda s: s["priority"])
|
||||
parts = []
|
||||
remaining = self._budget
|
||||
|
||||
for section in self._sections:
|
||||
if remaining <= 0:
|
||||
break
|
||||
|
||||
header = f"### {section['label']}\n"
|
||||
sectionText = header
|
||||
remaining -= len(header)
|
||||
|
||||
for item in section["items"]:
|
||||
if remaining <= 0:
|
||||
break
|
||||
|
||||
if section["isKeyValue"]:
|
||||
line = f"- {item.get('key', '')}: {item.get('value', '')}\n"
|
||||
else:
|
||||
data = item.get("data", "")
|
||||
ref = item.get("contextRef", {})
|
||||
score = item.get("_score", "")
|
||||
refStr = f" [{ref}]" if ref else ""
|
||||
line = f"{data}{refStr}\n"
|
||||
|
||||
if len(line) <= remaining:
|
||||
sectionText += line
|
||||
remaining -= len(line)
|
||||
|
||||
parts.append(sectionText)
|
||||
|
||||
return "\n".join(parts).strip()
|
||||
427
modules/serviceCenter/services/serviceKnowledge/subPreScan.py
Normal file
427
modules/serviceCenter/services/serviceKnowledge/subPreScan.py
Normal file
|
|
@ -0,0 +1,427 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Structure Pre-Scan: fast, AI-free document analysis.
|
||||
|
||||
Extracts TOC, headings, page map, image positions, and structural metadata
|
||||
from documents. Used as the first step in the auto-index pipeline.
|
||||
|
||||
Supported formats:
|
||||
- PDF: TOC, heading detection (font-size heuristic), page map, image positions
|
||||
- DOCX: heading styles, paragraph map
|
||||
- PPTX: slide titles, slide map
|
||||
- XLSX: sheet names, row/column counts
|
||||
- Other: minimal index (single content object = the file itself)
|
||||
"""
|
||||
|
||||
import io
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
||||
from modules.datamodels.datamodelContent import ContentObjectSummary, ContentContextRef
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def preScanDocument(
|
||||
fileData: bytes,
|
||||
mimeType: str,
|
||||
fileId: str,
|
||||
fileName: str = "",
|
||||
userId: str = "",
|
||||
featureInstanceId: str = "",
|
||||
mandateId: str = "",
|
||||
) -> FileContentIndex:
|
||||
"""Create a structural FileContentIndex without AI.
|
||||
|
||||
This is purely programmatic: TOC extraction, heading detection,
|
||||
page mapping, image position scanning.
|
||||
"""
|
||||
scanner = _SCANNER_MAP.get(mimeType)
|
||||
if scanner is None:
|
||||
ext = (fileName.rsplit(".", 1)[-1].lower()) if "." in fileName else ""
|
||||
scanner = _EXTENSION_SCANNER_MAP.get(ext, _scanMinimal)
|
||||
|
||||
try:
|
||||
structure, objectSummary, totalObjects, totalSize = await scanner(fileData, fileName)
|
||||
except Exception as e:
|
||||
logger.error(f"Pre-scan failed for {fileName} ({mimeType}): {e}")
|
||||
structure = {"error": str(e)}
|
||||
objectSummary = []
|
||||
totalObjects = 0
|
||||
totalSize = len(fileData)
|
||||
|
||||
return FileContentIndex(
|
||||
id=fileId,
|
||||
userId=userId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
mandateId=mandateId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
totalObjects=totalObjects,
|
||||
totalSize=totalSize,
|
||||
structure=structure,
|
||||
objectSummary=[s.model_dump() for s in objectSummary],
|
||||
status="extracted",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PDF scanner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _scanPdf(fileData: bytes, fileName: str):
|
||||
try:
|
||||
import fitz
|
||||
except ImportError:
|
||||
logger.warning("PyMuPDF not installed -- PDF pre-scan unavailable")
|
||||
return _fallbackStructure(fileData, fileName)
|
||||
|
||||
doc = fitz.open(stream=fileData, filetype="pdf")
|
||||
toc = doc.get_toc()
|
||||
|
||||
pageMap: List[Dict[str, Any]] = []
|
||||
summaries: List[ContentObjectSummary] = []
|
||||
totalSize = 0
|
||||
objIndex = 0
|
||||
|
||||
for i in range(len(doc)):
|
||||
page = doc[i]
|
||||
textLen = len(page.get_text())
|
||||
blocks = page.get_text("dict", flags=0).get("blocks", [])
|
||||
|
||||
headings = []
|
||||
for b in blocks:
|
||||
if b.get("type") != 0:
|
||||
continue
|
||||
for line in b.get("lines", []):
|
||||
for span in line.get("spans", []):
|
||||
if _isHeading(span):
|
||||
headings.append(span.get("text", "").strip())
|
||||
|
||||
images = page.get_images(full=True)
|
||||
hasTable = _detectTableHeuristic(page)
|
||||
|
||||
pageMap.append({
|
||||
"pageIndex": i,
|
||||
"headings": headings,
|
||||
"hasImages": len(images) > 0,
|
||||
"imageCount": len(images),
|
||||
"textLength": textLen,
|
||||
"hasTable": hasTable,
|
||||
})
|
||||
|
||||
if textLen > 0:
|
||||
summaries.append(ContentObjectSummary(
|
||||
id=f"co-{objIndex}",
|
||||
contentType="text",
|
||||
contextRef=ContentContextRef(
|
||||
containerPath=fileName,
|
||||
location=f"page:{i+1}",
|
||||
pageIndex=i,
|
||||
),
|
||||
charCount=textLen,
|
||||
))
|
||||
totalSize += textLen
|
||||
objIndex += 1
|
||||
|
||||
for j in range(len(images)):
|
||||
summaries.append(ContentObjectSummary(
|
||||
id=f"co-{objIndex}",
|
||||
contentType="image",
|
||||
contextRef=ContentContextRef(
|
||||
containerPath=fileName,
|
||||
location=f"page:{i+1}/image:{j}",
|
||||
pageIndex=i,
|
||||
),
|
||||
))
|
||||
objIndex += 1
|
||||
|
||||
sections = _buildSectionsFromTocOrHeadings(toc, pageMap)
|
||||
doc.close()
|
||||
|
||||
structure = {
|
||||
"pages": len(pageMap),
|
||||
"toc": toc,
|
||||
"sections": sections,
|
||||
"pageMap": pageMap,
|
||||
"imageCount": sum(p.get("imageCount", 0) for p in pageMap),
|
||||
"tableCount": sum(1 for p in pageMap if p.get("hasTable")),
|
||||
}
|
||||
return structure, summaries, len(summaries), totalSize
|
||||
|
||||
|
||||
def _isHeading(span: Dict) -> bool:
|
||||
"""Heuristic: heading if font size >= 14 or bold + size >= 12."""
|
||||
size = span.get("size", 0)
|
||||
flags = span.get("flags", 0)
|
||||
isBold = bool(flags & (1 << 4))
|
||||
return size >= 14 or (isBold and size >= 12)
|
||||
|
||||
|
||||
def _detectTableHeuristic(page) -> bool:
|
||||
"""Detect tables by looking for grid-like line patterns."""
|
||||
try:
|
||||
drawings = page.get_drawings()
|
||||
lineCount = sum(1 for d in drawings if d.get("type") == "l")
|
||||
return lineCount >= 6
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _buildSectionsFromTocOrHeadings(
|
||||
toc: list, pageMap: List[Dict]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Build section boundaries from TOC or heading data."""
|
||||
sections: List[Dict[str, Any]] = []
|
||||
|
||||
if toc:
|
||||
for i, entry in enumerate(toc):
|
||||
level, title, pageNum = entry[0], entry[1], entry[2]
|
||||
endPage = toc[i + 1][2] - 1 if i + 1 < len(toc) else len(pageMap) - 1
|
||||
sections.append({
|
||||
"id": f"section-{i}",
|
||||
"title": title,
|
||||
"level": level,
|
||||
"startPage": pageNum - 1,
|
||||
"endPage": endPage,
|
||||
})
|
||||
else:
|
||||
currentSection = None
|
||||
for pm in pageMap:
|
||||
headings = pm.get("headings", [])
|
||||
if headings:
|
||||
if currentSection:
|
||||
currentSection["endPage"] = pm["pageIndex"] - 1
|
||||
sections.append(currentSection)
|
||||
currentSection = {
|
||||
"id": f"section-{len(sections)}",
|
||||
"title": headings[0],
|
||||
"level": 1,
|
||||
"startPage": pm["pageIndex"],
|
||||
"endPage": pm["pageIndex"],
|
||||
}
|
||||
elif currentSection:
|
||||
currentSection["endPage"] = pm["pageIndex"]
|
||||
|
||||
if currentSection:
|
||||
sections.append(currentSection)
|
||||
|
||||
return sections
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DOCX scanner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _scanDocx(fileData: bytes, fileName: str):
|
||||
try:
|
||||
import docx
|
||||
except ImportError:
|
||||
return _fallbackStructure(fileData, fileName)
|
||||
|
||||
doc = docx.Document(io.BytesIO(fileData))
|
||||
summaries: List[ContentObjectSummary] = []
|
||||
sections: List[Dict[str, Any]] = []
|
||||
totalSize = 0
|
||||
objIndex = 0
|
||||
currentSection = None
|
||||
|
||||
for i, para in enumerate(doc.paragraphs):
|
||||
text = para.text or ""
|
||||
styleName = (para.style.name or "").lower() if para.style else ""
|
||||
|
||||
if "heading" in styleName and text.strip():
|
||||
if currentSection:
|
||||
sections.append(currentSection)
|
||||
level = 1
|
||||
for ch in styleName:
|
||||
if ch.isdigit():
|
||||
level = int(ch)
|
||||
break
|
||||
currentSection = {
|
||||
"id": f"section-{len(sections)}",
|
||||
"title": text.strip(),
|
||||
"level": level,
|
||||
"startParagraph": i,
|
||||
"endParagraph": i,
|
||||
}
|
||||
elif currentSection:
|
||||
currentSection["endParagraph"] = i
|
||||
|
||||
if text.strip():
|
||||
summaries.append(ContentObjectSummary(
|
||||
id=f"co-{objIndex}",
|
||||
contentType="text",
|
||||
contextRef=ContentContextRef(
|
||||
containerPath=fileName,
|
||||
location=f"paragraph:{i+1}",
|
||||
sectionId=currentSection["id"] if currentSection else "body",
|
||||
),
|
||||
charCount=len(text),
|
||||
))
|
||||
totalSize += len(text)
|
||||
objIndex += 1
|
||||
|
||||
if currentSection:
|
||||
sections.append(currentSection)
|
||||
|
||||
for ti, table in enumerate(doc.tables):
|
||||
summaries.append(ContentObjectSummary(
|
||||
id=f"co-{objIndex}",
|
||||
contentType="text",
|
||||
contextRef=ContentContextRef(
|
||||
containerPath=fileName,
|
||||
location=f"table:{ti+1}",
|
||||
),
|
||||
))
|
||||
objIndex += 1
|
||||
|
||||
structure = {
|
||||
"paragraphs": len(doc.paragraphs),
|
||||
"tables": len(doc.tables),
|
||||
"sections": sections,
|
||||
}
|
||||
return structure, summaries, len(summaries), totalSize
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PPTX scanner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _scanPptx(fileData: bytes, fileName: str):
|
||||
try:
|
||||
from pptx import Presentation
|
||||
except ImportError:
|
||||
return _fallbackStructure(fileData, fileName)
|
||||
|
||||
prs = Presentation(io.BytesIO(fileData))
|
||||
summaries: List[ContentObjectSummary] = []
|
||||
slideMap: List[Dict[str, Any]] = []
|
||||
totalSize = 0
|
||||
objIndex = 0
|
||||
|
||||
for i, slide in enumerate(prs.slides):
|
||||
title = ""
|
||||
textLen = 0
|
||||
imageCount = 0
|
||||
for shape in slide.shapes:
|
||||
if hasattr(shape, "text"):
|
||||
textLen += len(shape.text)
|
||||
if shape.has_text_frame and not title:
|
||||
title = shape.text.strip()[:80]
|
||||
if shape.shape_type == 13:
|
||||
imageCount += 1
|
||||
|
||||
slideMap.append({
|
||||
"slideIndex": i,
|
||||
"title": title,
|
||||
"textLength": textLen,
|
||||
"imageCount": imageCount,
|
||||
})
|
||||
|
||||
if textLen > 0:
|
||||
summaries.append(ContentObjectSummary(
|
||||
id=f"co-{objIndex}",
|
||||
contentType="text",
|
||||
contextRef=ContentContextRef(
|
||||
containerPath=fileName,
|
||||
location=f"slide:{i+1}",
|
||||
slideIndex=i,
|
||||
),
|
||||
charCount=textLen,
|
||||
))
|
||||
totalSize += textLen
|
||||
objIndex += 1
|
||||
|
||||
structure = {
|
||||
"slides": len(prs.slides),
|
||||
"slideMap": slideMap,
|
||||
}
|
||||
return structure, summaries, len(summaries), totalSize
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# XLSX scanner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _scanXlsx(fileData: bytes, fileName: str):
|
||||
try:
|
||||
import openpyxl
|
||||
except ImportError:
|
||||
return _fallbackStructure(fileData, fileName)
|
||||
|
||||
wb = openpyxl.load_workbook(io.BytesIO(fileData), data_only=True, read_only=True)
|
||||
summaries: List[ContentObjectSummary] = []
|
||||
sheetMap: List[Dict[str, Any]] = []
|
||||
totalSize = 0
|
||||
objIndex = 0
|
||||
|
||||
for sheetName in wb.sheetnames:
|
||||
ws = wb[sheetName]
|
||||
rowCount = ws.max_row or 0
|
||||
colCount = ws.max_column or 0
|
||||
|
||||
sheetMap.append({
|
||||
"sheetName": sheetName,
|
||||
"rows": rowCount,
|
||||
"columns": colCount,
|
||||
})
|
||||
|
||||
summaries.append(ContentObjectSummary(
|
||||
id=f"co-{objIndex}",
|
||||
contentType="text",
|
||||
contextRef=ContentContextRef(
|
||||
containerPath=fileName,
|
||||
location=f"sheet:{sheetName}",
|
||||
sheetName=sheetName,
|
||||
),
|
||||
charCount=rowCount * colCount * 10,
|
||||
))
|
||||
totalSize += rowCount * colCount * 10
|
||||
objIndex += 1
|
||||
|
||||
wb.close()
|
||||
structure = {"sheets": len(wb.sheetnames), "sheetMap": sheetMap}
|
||||
return structure, summaries, len(summaries), totalSize
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Minimal / fallback scanner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _scanMinimal(fileData: bytes, fileName: str):
|
||||
return _fallbackStructure(fileData, fileName)
|
||||
|
||||
|
||||
def _fallbackStructure(fileData: bytes, fileName: str):
|
||||
summary = ContentObjectSummary(
|
||||
id="co-0",
|
||||
contentType="other",
|
||||
contextRef=ContentContextRef(containerPath=fileName, location="file"),
|
||||
charCount=len(fileData),
|
||||
)
|
||||
structure = {"type": "single", "size": len(fileData)}
|
||||
return structure, [summary], 1, len(fileData)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scanner map
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SCANNER_MAP: Dict[str, Any] = {
|
||||
"application/pdf": _scanPdf,
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": _scanDocx,
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation": _scanPptx,
|
||||
"application/vnd.ms-powerpoint": _scanPptx,
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": _scanXlsx,
|
||||
}
|
||||
|
||||
_EXTENSION_SCANNER_MAP: Dict[str, Any] = {
|
||||
"pdf": _scanPdf,
|
||||
"docx": _scanDocx,
|
||||
"pptx": _scanPptx,
|
||||
"ppt": _scanPptx,
|
||||
"xlsx": _scanXlsx,
|
||||
"xlsm": _scanXlsx,
|
||||
}
|
||||
|
|
@ -375,7 +375,7 @@ USER PROVIDED:
|
|||
- Language: {language or "Not specified"}
|
||||
|
||||
Extract and provide a JSON response with:
|
||||
1. instruction: Formulate directly, WHAT you want to find on the web. Do not include URLs in the instruction. Good example: "What is the company Xyz doing?". Bad example: "Conduct web research on the company Xyz"
|
||||
1. instruction: Formulate a concise search query (MAXIMUM 400 characters) stating WHAT you want to find on the web. Do not include URLs in the instruction. Keep it focused on the core question. Good example: "What is the company Xyz doing?". Bad example: "Conduct web research on the company Xyz and find all information about..."
|
||||
2. urls: Put list of URLs found in the prompt text, and URL's you know, that are relevant to the research
|
||||
3. needsSearch: true if web search is needed to identify url's to crawl, false if only crawling of provided URLs is wanted
|
||||
4. maxNumberPages: Recommended number of URLs to crawl (based on research scope, typical: 2-20)
|
||||
|
|
|
|||
|
|
@ -1,13 +1,18 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Services Module.
|
||||
Central service registry that provides access to shared services.
|
||||
Service Hub.
|
||||
Consumer-facing aggregation layer for services, DB interfaces, and runtime state.
|
||||
|
||||
IMPORTANT: Import-Regelwerk
|
||||
- Zentrale Module (wie dieses) dürfen KEINE Feature-Container importieren
|
||||
Architecture:
|
||||
- serviceHub delegates service resolution to serviceCenter (DI container)
|
||||
- serviceHub owns DB interface initialization and runtime state
|
||||
- serviceCenter knows nothing about serviceHub (one-way dependency)
|
||||
|
||||
Import-Regelwerk:
|
||||
- Zentrale Module (wie dieses) duerfen KEINE Feature-Container importieren
|
||||
- Feature-spezifische Services werden dynamisch geladen
|
||||
- Nur Shared Services werden direkt geladen
|
||||
- Shared Services werden via serviceCenter resolved
|
||||
"""
|
||||
|
||||
import os
|
||||
|
|
@ -23,7 +28,6 @@ if TYPE_CHECKING:
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Path to feature containers
|
||||
_FEATURES_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "features")
|
||||
|
||||
|
||||
|
|
@ -54,15 +58,19 @@ class PublicService:
|
|||
])
|
||||
|
||||
|
||||
class Services:
|
||||
class ServiceHub:
|
||||
"""
|
||||
Central Services class providing access to all services.
|
||||
Consumer-facing aggregation of services, DB interfaces, and runtime state.
|
||||
|
||||
Import-Regelwerk:
|
||||
- Shared Services are loaded directly (from modules/services/)
|
||||
- Feature-specific Services are loaded dynamically via filename discovery
|
||||
Services are lazy-resolved via serviceCenter on first access.
|
||||
DB interfaces and runtime state are initialized eagerly.
|
||||
Feature services/interfaces are discovered dynamically from features/.
|
||||
"""
|
||||
|
||||
_SERVICE_CENTER_WRAPPING = {
|
||||
"ai": {"functionsOnly": False},
|
||||
}
|
||||
|
||||
def __init__(self, user: User, workflow: "ChatWorkflow" = None, mandateId: Optional[str] = None, featureInstanceId: Optional[str] = None):
|
||||
self.user: User = user
|
||||
self.workflow = workflow
|
||||
|
|
@ -71,7 +79,14 @@ class Services:
|
|||
self.currentUserPrompt: str = ""
|
||||
self.rawUserPrompt: str = ""
|
||||
|
||||
# Initialize central interfaces
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
self._serviceCenterContext = ServiceCenterContext(
|
||||
user=user,
|
||||
workflow=workflow,
|
||||
mandate_id=mandateId,
|
||||
feature_instance_id=featureInstanceId,
|
||||
)
|
||||
|
||||
from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
|
||||
self.interfaceDbApp = getAppInterface(user, mandateId=mandateId)
|
||||
|
||||
|
|
@ -80,75 +95,40 @@ class Services:
|
|||
|
||||
self.rbac = self.interfaceDbApp.rbac if self.interfaceDbApp else None
|
||||
|
||||
# ============================================================
|
||||
# CENTRAL INTERFACE (Chat/Workflow)
|
||||
# ============================================================
|
||||
from modules.interfaces.interfaceDbChat import getInterface as getChatInterface
|
||||
self.interfaceDbChat = getChatInterface(user, mandateId=mandateId, featureInstanceId=featureInstanceId)
|
||||
|
||||
# ============================================================
|
||||
# SHARED SERVICES (from modules/services/)
|
||||
# ============================================================
|
||||
from .serviceSharepoint.mainServiceSharepoint import SharepointService
|
||||
self.sharepoint = PublicService(SharepointService(self))
|
||||
|
||||
from .serviceTicket.mainServiceTicket import TicketService
|
||||
self.ticket = PublicService(TicketService(self))
|
||||
|
||||
from .serviceChat.mainServiceChat import ChatService
|
||||
self.chat = PublicService(ChatService(self))
|
||||
|
||||
from .serviceUtils.mainServiceUtils import UtilsService
|
||||
self.utils = PublicService(UtilsService(self))
|
||||
|
||||
from .serviceSecurity.mainServiceSecurity import SecurityService
|
||||
self.security = PublicService(SecurityService(self))
|
||||
|
||||
from .serviceMessaging.mainServiceMessaging import MessagingService
|
||||
self.messaging = PublicService(MessagingService(self))
|
||||
|
||||
from .serviceStreaming.mainServiceStreaming import StreamingService
|
||||
self.streaming = PublicService(StreamingService(self))
|
||||
|
||||
# ============================================================
|
||||
# AI SERVICES (from modules/services/)
|
||||
# ============================================================
|
||||
from .serviceAi.mainServiceAi import AiService
|
||||
self.ai = PublicService(AiService(self), functionsOnly=False)
|
||||
|
||||
from .serviceExtraction.mainServiceExtraction import ExtractionService
|
||||
self.extraction = PublicService(ExtractionService(self))
|
||||
|
||||
from .serviceGeneration.mainServiceGeneration import GenerationService
|
||||
self.generation = PublicService(GenerationService(self))
|
||||
|
||||
from .serviceWeb.mainServiceWeb import WebService
|
||||
self.web = PublicService(WebService(self))
|
||||
|
||||
# ============================================================
|
||||
# FEATURE INTERFACES (dynamically loaded)
|
||||
# ============================================================
|
||||
self._loadFeatureInterfaces()
|
||||
self._loadFeatureServices()
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Lazy-resolve services via serviceCenter on first access."""
|
||||
if name.startswith('_'):
|
||||
raise AttributeError(name)
|
||||
try:
|
||||
from modules.serviceCenter import getService
|
||||
service = getService(name, self._serviceCenterContext)
|
||||
wrapping = self._SERVICE_CENTER_WRAPPING.get(name, {})
|
||||
functionsOnly = wrapping.get("functionsOnly", True)
|
||||
wrapped = PublicService(service, functionsOnly=functionsOnly)
|
||||
setattr(self, name, wrapped)
|
||||
return wrapped
|
||||
except KeyError:
|
||||
raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'")
|
||||
|
||||
def _loadFeatureInterfaces(self):
|
||||
"""Dynamically load interfaces from feature containers by filename pattern."""
|
||||
# Find all interfaceFeature*.py files
|
||||
pattern = os.path.join(_FEATURES_DIR, "*", "interfaceFeature*.py")
|
||||
for filepath in glob.glob(pattern):
|
||||
try:
|
||||
# Extract feature name and interface name
|
||||
featureDir = os.path.basename(os.path.dirname(filepath))
|
||||
filename = os.path.basename(filepath)[:-3] # Remove .py
|
||||
filename = os.path.basename(filepath)[:-3]
|
||||
|
||||
# Build module path: modules.features.<feature>.<filename>
|
||||
modulePath = f"modules.features.{featureDir}.{filename}"
|
||||
module = importlib.import_module(modulePath)
|
||||
|
||||
# Get interface via getInterface()
|
||||
if hasattr(module, "getInterface"):
|
||||
interface = module.getInterface(self.user, mandateId=self.mandateId, featureInstanceId=self.featureInstanceId)
|
||||
# Derive attribute name: interfaceFeatureAiChat -> interfaceDbChat
|
||||
attrName = filename.replace("interfaceFeature", "interfaceDb")
|
||||
setattr(self, attrName, interface)
|
||||
logger.debug(f"Loaded interface: {attrName} from {modulePath}")
|
||||
|
|
@ -157,35 +137,29 @@ class Services:
|
|||
|
||||
def _loadFeatureServices(self):
|
||||
"""Dynamically load services from feature containers by filename pattern."""
|
||||
# Find all service*/mainService*.py files in feature containers
|
||||
pattern = os.path.join(_FEATURES_DIR, "*", "service*", "mainService*.py")
|
||||
for filepath in glob.glob(pattern):
|
||||
try:
|
||||
# Extract paths
|
||||
serviceDir = os.path.basename(os.path.dirname(filepath))
|
||||
featureDir = os.path.basename(os.path.dirname(os.path.dirname(filepath)))
|
||||
filename = os.path.basename(filepath)[:-3] # Remove .py
|
||||
filename = os.path.basename(filepath)[:-3]
|
||||
|
||||
# Build module path: modules.features.<feature>.<serviceDir>.<filename>
|
||||
modulePath = f"modules.features.{featureDir}.{serviceDir}.{filename}"
|
||||
module = importlib.import_module(modulePath)
|
||||
|
||||
# Find service class (ends with "Service")
|
||||
serviceClass = None
|
||||
for name in dir(module):
|
||||
if name.endswith("Service") and not name.startswith("_"):
|
||||
cls = getattr(module, name)
|
||||
for attrName in dir(module):
|
||||
if attrName.endswith("Service") and not attrName.startswith("_"):
|
||||
cls = getattr(module, attrName)
|
||||
if isinstance(cls, type):
|
||||
serviceClass = cls
|
||||
break
|
||||
|
||||
if serviceClass:
|
||||
# Derive attribute name: serviceAi -> ai, serviceExtraction -> extraction
|
||||
attrName = serviceDir.replace("service", "").lower()
|
||||
if not attrName:
|
||||
attrName = serviceDir.lower()
|
||||
|
||||
# Check if it needs functionsOnly=False (for AI service)
|
||||
functionsOnly = attrName != "ai"
|
||||
|
||||
serviceInstance = serviceClass(self)
|
||||
|
|
@ -195,6 +169,10 @@ class Services:
|
|||
logger.debug(f"Could not load service from {filepath}: {e}")
|
||||
|
||||
|
||||
def getInterface(user: User, workflow: "ChatWorkflow" = None, mandateId: Optional[str] = None, featureInstanceId: Optional[str] = None) -> Services:
|
||||
"""Get Services instance for the given user, mandate, and feature instance context."""
|
||||
return Services(user, workflow, mandateId=mandateId, featureInstanceId=featureInstanceId)
|
||||
# Backward-compatible alias
|
||||
Services = ServiceHub
|
||||
|
||||
|
||||
def getInterface(user: User, workflow: "ChatWorkflow" = None, mandateId: Optional[str] = None, featureInstanceId: Optional[str] = None) -> ServiceHub:
|
||||
"""Get ServiceHub instance for the given user, mandate, and feature instance context."""
|
||||
return ServiceHub(user, workflow, mandateId=mandateId, featureInstanceId=featureInstanceId)
|
||||
|
|
@ -1,166 +0,0 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
AIChat Feature Container - Main Module.
|
||||
Handles feature initialization and RBAC catalog registration.
|
||||
|
||||
AIChat is the dynamic chat workflow feature that handles:
|
||||
- AI-powered document processing
|
||||
- Dynamic workflow execution
|
||||
- Automation definitions
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Feature metadata
|
||||
FEATURE_CODE = "chatworkflow"
|
||||
FEATURE_LABEL = {"en": "Chat Workflow", "de": "Chat-Workflow", "fr": "Workflow de Chat"}
|
||||
FEATURE_ICON = "mdi-message-cog"
|
||||
|
||||
# UI Objects for RBAC catalog
|
||||
UI_OBJECTS = [
|
||||
{
|
||||
"objectKey": "ui.feature.aichat.workflows",
|
||||
"label": {"en": "Workflows", "de": "Workflows", "fr": "Workflows"},
|
||||
"meta": {"area": "workflows"}
|
||||
},
|
||||
{
|
||||
"objectKey": "ui.feature.aichat.automations",
|
||||
"label": {"en": "Automations", "de": "Automatisierungen", "fr": "Automatisations"},
|
||||
"meta": {"area": "automations"}
|
||||
},
|
||||
{
|
||||
"objectKey": "ui.feature.aichat.logs",
|
||||
"label": {"en": "Logs", "de": "Logs", "fr": "Journaux"},
|
||||
"meta": {"area": "logs"}
|
||||
},
|
||||
]
|
||||
|
||||
# Resource Objects for RBAC catalog
|
||||
RESOURCE_OBJECTS = [
|
||||
{
|
||||
"objectKey": "resource.feature.aichat.workflow.start",
|
||||
"label": {"en": "Start Workflow", "de": "Workflow starten", "fr": "Démarrer workflow"},
|
||||
"meta": {"endpoint": "/api/chat/playground/start", "method": "POST"}
|
||||
},
|
||||
{
|
||||
"objectKey": "resource.feature.aichat.workflow.stop",
|
||||
"label": {"en": "Stop Workflow", "de": "Workflow stoppen", "fr": "Arrêter workflow"},
|
||||
"meta": {"endpoint": "/api/chat/playground/stop/{workflowId}", "method": "POST"}
|
||||
},
|
||||
{
|
||||
"objectKey": "resource.feature.aichat.workflow.delete",
|
||||
"label": {"en": "Delete Workflow", "de": "Workflow löschen", "fr": "Supprimer workflow"},
|
||||
"meta": {"endpoint": "/api/chat/playground/workflow/{workflowId}", "method": "DELETE"}
|
||||
},
|
||||
]
|
||||
|
||||
# Template roles for this feature
|
||||
TEMPLATE_ROLES = [
|
||||
{
|
||||
"roleLabel": "workflow-admin",
|
||||
"description": {
|
||||
"en": "Workflow Administrator - Full access to workflow configuration and execution",
|
||||
"de": "Workflow-Administrator - Vollzugriff auf Workflow-Konfiguration und Ausführung",
|
||||
"fr": "Administrateur workflow - Accès complet à la configuration et exécution"
|
||||
}
|
||||
},
|
||||
{
|
||||
"roleLabel": "workflow-editor",
|
||||
"description": {
|
||||
"en": "Workflow Editor - Create and modify workflows",
|
||||
"de": "Workflow-Editor - Workflows erstellen und bearbeiten",
|
||||
"fr": "Éditeur workflow - Créer et modifier les workflows"
|
||||
}
|
||||
},
|
||||
{
|
||||
"roleLabel": "workflow-viewer",
|
||||
"description": {
|
||||
"en": "Workflow Viewer - View workflows and execution results",
|
||||
"de": "Workflow-Betrachter - Workflows und Ausführungsergebnisse einsehen",
|
||||
"fr": "Visualiseur workflow - Consulter les workflows et résultats"
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def getFeatureDefinition() -> Dict[str, Any]:
|
||||
"""Return the feature definition for registration."""
|
||||
return {
|
||||
"code": FEATURE_CODE,
|
||||
"label": FEATURE_LABEL,
|
||||
"icon": FEATURE_ICON
|
||||
}
|
||||
|
||||
|
||||
def getUiObjects() -> List[Dict[str, Any]]:
|
||||
"""Return UI objects for RBAC catalog registration."""
|
||||
return UI_OBJECTS
|
||||
|
||||
|
||||
def getResourceObjects() -> List[Dict[str, Any]]:
|
||||
"""Return resource objects for RBAC catalog registration."""
|
||||
return RESOURCE_OBJECTS
|
||||
|
||||
|
||||
def getTemplateRoles() -> List[Dict[str, Any]]:
|
||||
"""Return template roles for this feature."""
|
||||
return TEMPLATE_ROLES
|
||||
|
||||
|
||||
def registerFeature(catalogService) -> bool:
|
||||
"""
|
||||
Register this feature's RBAC objects in the catalog.
|
||||
|
||||
Args:
|
||||
catalogService: The RBAC catalog service instance
|
||||
|
||||
Returns:
|
||||
True if registration was successful
|
||||
"""
|
||||
try:
|
||||
# Register UI objects
|
||||
for uiObj in UI_OBJECTS:
|
||||
catalogService.registerUiObject(
|
||||
featureCode=FEATURE_CODE,
|
||||
objectKey=uiObj["objectKey"],
|
||||
label=uiObj["label"],
|
||||
meta=uiObj.get("meta")
|
||||
)
|
||||
|
||||
# Register Resource objects
|
||||
for resObj in RESOURCE_OBJECTS:
|
||||
catalogService.registerResourceObject(
|
||||
featureCode=FEATURE_CODE,
|
||||
objectKey=resObj["objectKey"],
|
||||
label=resObj["label"],
|
||||
meta=resObj.get("meta")
|
||||
)
|
||||
|
||||
logger.info(f"Feature '{FEATURE_CODE}' registered {len(UI_OBJECTS)} UI objects and {len(RESOURCE_OBJECTS)} resource objects")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to register feature '{FEATURE_CODE}': {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def onStart(eventUser) -> None:
|
||||
"""
|
||||
Called when the feature container starts.
|
||||
Initializes AI connectors for model registry.
|
||||
"""
|
||||
try:
|
||||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
||||
modelRegistry.ensureConnectorsRegistered()
|
||||
logger.info(f"Feature '{FEATURE_CODE}' started - AI connectors initialized")
|
||||
except Exception as e:
|
||||
logger.error(f"Feature '{FEATURE_CODE}' failed to initialize AI connectors: {e}")
|
||||
|
||||
|
||||
async def onStop(eventUser) -> None:
|
||||
"""Called when the feature container stops."""
|
||||
logger.info(f"Feature '{FEATURE_CODE}' stopped")
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,513 +0,0 @@
|
|||
================================================================================
|
||||
JSON MERGE OPERATION #1
|
||||
================================================================================
|
||||
Timestamp: 2026-01-06T22:24:33.405726
|
||||
|
||||
INPUT:
|
||||
Accumulated length: 40250 chars
|
||||
New Fragment length: 2471 chars
|
||||
Accumulated: 373 lines (showing first 5 and last 5)
|
||||
{
|
||||
"elements": [
|
||||
{
|
||||
"type": "table",
|
||||
"content": {
|
||||
... (363 lines omitted) ...
|
||||
["04.12.25", "05.12.25", "TICKETCORNER CH, RUEMLANG", "CH", "285.70", "", "**** **** **** 1234"],
|
||||
["05.12.25", "05.12.25", "NOII.CH DATING, WINTERTHUR", "CH", "74.10", "", "**** **** **** 1234"],
|
||||
["06.12.25", "08.12.25", "TEMU.COM, BASEL", "CH", "72.50", "", "**** **** **** 1234"],
|
||||
["06.12.25", "08.12.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "326.85", "", "**** **** **** 1234"],
|
||||
["06.12.25", "08.12.25", "Decathlon, Hin
|
||||
New Fragment: 33 lines (showing first 5 and last 5)
|
||||
```json
|
||||
["06.12.25", "08.12.25", "Decathlon, Hinwil", "CH", "150.00", "", "**** **** **** 1234"],
|
||||
["07.12.25", "09.12.25", "Migros-Genossenschafts-Bund, Zürich", "CH", "200.00", "", "**** **** **** 1234"],
|
||||
["08.12.25", "10.12.25", "Zürich HB, Zürich", "CH", "45.00", "", "**** **** **** 1234"],
|
||||
["09.12.25", "11.12.25", "Amazon Marketplace, amazon.de", "DE", "120.00", "", "**** **** **** 1234"],
|
||||
... (23 lines omitted) ...
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Normalized Accumulated (40250 chars)
|
||||
(showing first 5 and last 5 of 373 lines)
|
||||
{
|
||||
"elements": [
|
||||
{
|
||||
"type": "table",
|
||||
"content": {
|
||||
... (363 lines omitted) ...
|
||||
["04.12.25", "05.12.25", "TICKETCORNER CH, RUEMLANG", "CH", "285.70", "", "**** **** **** 1234"],
|
||||
["05.12.25", "05.12.25", "NOII.CH DATING, WINTERTHUR", "CH", "74.10", "", "**** **** **** 1234"],
|
||||
["06.12.25", "08.12.25", "TEMU.COM, BASEL", "CH", "72.50", "", "**** **** **** 1234"],
|
||||
["06.12.25", "08.12.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "326.85", "", "**** **** **** 1234"],
|
||||
["06.12.25", "08.12.25", "Decathlon, Hin
|
||||
|
||||
Normalized New Fragment (2459 chars)
|
||||
(showing first 5 and last 5 of 31 lines)
|
||||
["06.12.25", "08.12.25", "Decathlon, Hinwil", "CH", "150.00", "", "**** **** **** 1234"],
|
||||
["07.12.25", "09.12.25", "Migros-Genossenschafts-Bund, Zürich", "CH", "200.00", "", "**** **** **** 1234"],
|
||||
["08.12.25", "10.12.25", "Zürich HB, Zürich", "CH", "45.00", "", "**** **** **** 1234"],
|
||||
["09.12.25", "11.12.25", "Amazon Marketplace, amazon.de", "DE", "120.00", "", "**** **** **** 1234"],
|
||||
["10.12.25", "12.12.25", "IKEA, Dietlikon", "CH", "350.00", "", "**** **** **** 1234"],
|
||||
... (21 lines omitted) ...
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
STEP: PHASE 1
|
||||
Description: Finding overlap between JSON strings
|
||||
⏳ In progress...
|
||||
|
||||
Overlap Detection (string (exact)):
|
||||
Overlap length: 40
|
||||
✅ Found overlap of 40 chars
|
||||
Accumulated suffix (COMPLETE, 40 chars):
|
||||
============================================================================
|
||||
["06.12.25", "08.12.25", "Decathlon, Hin
|
||||
============================================================================
|
||||
Fragment prefix (40 chars, 1 lines)
|
||||
["06.12.25", "08.12.25", "Decathlon, Hin
|
||||
|
||||
Overlap found (40 chars):
|
||||
Accumulated suffix: ["06.12.25", "08.12.25", "Decathlon, Hin
|
||||
Fragment prefix: ["06.12.25", "08.12.25", "Decathlon, Hin
|
||||
STEP: PHASE 2
|
||||
Description: Merging strings (overlap: 40 chars)
|
||||
⏳ In progress...
|
||||
|
||||
|
||||
Merged String (42669 chars)
|
||||
(showing first 5 and last 5 of 403 lines)
|
||||
{
|
||||
"elements": [
|
||||
{
|
||||
"type": "table",
|
||||
"content": {
|
||||
... (393 lines omitted) ...
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
STEP: PHASE 3
|
||||
Description: Returning merged string (may be unclosed)
|
||||
⏳ In progress...
|
||||
|
||||
|
||||
Returning merged string (preserving incomplete element at end for next iteration)
|
||||
|
||||
================================================================================
|
||||
MERGE RESULT: ✅ SUCCESS
|
||||
================================================================================
|
||||
Final result length: 42669 chars
|
||||
Final result (COMPLETE):
|
||||
================================================================================
|
||||
{
|
||||
"elements": [
|
||||
{
|
||||
"type": "table",
|
||||
"content": {
|
||||
"headers": [
|
||||
"Date",
|
||||
"Valuta",
|
||||
"Details",
|
||||
"Currency",
|
||||
"Amount",
|
||||
"Amount in CHF",
|
||||
"Maskierte Kreditkarte"
|
||||
],
|
||||
"rows": [
|
||||
["12.09.25", "15.09.25", "Coop-1911 Ruti, Ruti ZH", "CH", "102.05", "", "**** **** **** 1234"],
|
||||
["13.09.25", "15.09.25", "food & drive GmbH, Durnten", "CH", "26.20", "", "**** **** **** 1234"],
|
||||
["13.09.25", "15.09.25", "food & drive GmbH, Durnten", "CH", "4.50", "", "**** **** **** 1234"],
|
||||
["13.09.25", "15.09.25", "Gartencenter Meier, Durnten", "CH", "88.40", "", "**** **** **** 1234"],
|
||||
["13.09.25", "15.09.25", "Google PixVerse AI Vi, 650-2530000", "US", "5.20", "", "**** **** **** 1234"],
|
||||
["14.09.25", "15.09.25", "THE ARTISAN ZUERICH, ZUERICH", "CH", "15.00", "", "**** **** **** 1234"],
|
||||
["14.09.25", "15.09.25", "THE ARTISAN ZUERICH, ZUERICH", "CH", "15.00", "", "**** **** **** 1234"],
|
||||
["14.09.25", "15.09.25", "THE ARTISAN ZUERICH, ZUERICH", "CH", "18.00", "", "**** **** **** 1234"],
|
||||
["14.09.25", "15.09.25", "KONDITOREI VOLAND WALD, WALD ZH", "CH", "16.50", "", "**** **** **** 1234"],
|
||||
["14.09.25", "15.09.25", "GITHUB, INC., GITHUB.COM", "US", "USD 0.02", "0.00", "**** **** **** 1234"],
|
||||
["15.09.25", "16.09.25", "Coop-1252 Wald, Wald ZH", "CH", "50.80", "", "**** **** **** 1234"],
|
||||
["15.09.25", "16.09.25", "CLAUDE.AI SUBSCRIPTION, ANTHROPIC.COM", "US", "USD 108.10", "88.60", "**** **** **** 1234"],
|
||||
["16.09.25", "17.09.25", "Shell Waldhof, Wald ZH", "CH", "113.35", "", "**** **** **** 1234"],
|
||||
["16.09.25", "17.09.25", "Shell Waldhof, Wald ZH", "CH", "3.60", "", "**** **** **** 1234"],
|
||||
["18.09.25", "19.09.25", "Coop-4991 Fallanden, Fallanden", "CH", "116.00", "", "**** **** **** 1234"],
|
||||
["18.09.25", "19.09.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "5.95", "", "**** **** **** 1234"],
|
||||
["18.09.25", "19.09.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "7.00", "", "**** **** **** 1234"],
|
||||
["19.09.25", "22.09.25", "Migros M Dubendorf Stettb, Dubendorf", "CH", "32.10", "", "**** **** **** 1234"],
|
||||
["19.09.25", "22.09.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "14.80", "", "**** **** **** 1234"],
|
||||
["19.09.25", "22.09.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "370.65", "", "**** **** **** 1234"],
|
||||
["19.09.25", "22.09.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "11.50", "", "**** **** **** 1234"],
|
||||
["20.09.25", "22.09.25", "Google PixVerse AI Vi, 650-2530000", "US", "5.20", "", "**** **** **** 1234"],
|
||||
["21.09.25", "22.09.25", "Kreuzwirt, Weissensee", "AT", "EUR 278.00", "266.50", "**** **** **** 1234"],
|
||||
["23.09.25", "24.09.25", "FILIALE, WALD ZH", "CH", "EUR 500.00", "492.15", "**** **** **** 1234"],
|
||||
["24.09.25", "25.09.25", "P2 Parkhaus Ein- & Ausfah, Zurich", "CH", "5.00", "", "**** **** **** 1234"],
|
||||
["24.09.25", "25.09.25", "A.I.R. Bakery, Zurich", "CH", "18.60", "", "**** **** **** 1234"],
|
||||
["24.09.25", "25.09.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "23.35", "", "**** **** **** 1234"],
|
||||
["25.09.25", "26.09.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "203.20", "", "**** **** **** 1234"],
|
||||
["25.09.25", "26.09.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "44.10", "", "**** **** **** 1234"],
|
||||
["26.09.25", "29.09.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "95.25", "", "**** **** **** 1234"],
|
||||
["26.09.25", "29.09.25", "Puls Apotheke & Drogerie, Hinwil", "CH", "140.60", "", "**** **** **** 1234"],
|
||||
["26.09.25", "29.09.25", "FILIALE, WALD ZH", "CH", "CHF 280.00", "287.00", "**** **** **** 1234"],
|
||||
["27.09.25", "29.09.25", "NYX*LullySA, Lully", "CH", "1.00", "", "**** **** **** 1234"],
|
||||
["27.09.25", "29.09.25", "Kisoque de Lully, Lully", "CH", "5.70", "", "**** **** **** 1234"],
|
||||
["27.09.25", "29.09.25", "TOTAL MKT FR, NANTERRE", "FR", "EUR 79.95", "76.90", "**** **** **** 1234"],
|
||||
["27.09.25", "29.09.25", "AREA NFC 4261525, 69BRON CEDEX", "FR", "EUR 33.50", "32.20", "**** **** **** 1234"],
|
||||
["27.09.25", "29.09.25", "HOLIDAY APARTMENTS, PORT SAPLAYA", "ES", "EUR 1'118.15", "1'075.45", "**** **** **** 1234"],
|
||||
["27.09.25", "29.09.25", "LE BISTROT DEL M, MEZE", "FR", "EUR 210.20", "202.15", "**** **** **** 1234"],
|
||||
["27.09.25", "29.09.25", "Google PixVerse AI Vi, 650-2530000", "US", "5.20", "", "**** **** **** 1234"],
|
||||
["28.09.25", "29.09.25", "SELVA HOSTELERIA, MACANET DE LA", "ES", "EUR 2.40", "2.30", "**** **** **** 1234"],
|
||||
["28.09.25", "29.09.25", "E.S. LA SELVA, COMAJULIANA", "ES", "EUR 90.09", "86.65", "**** **** **** 1234"],
|
||||
["28.09.25", "29.09.25", "E.S. LA SELVA, COMAJULIANA", "ES", "EUR 4.70", "4.50", "**** **** **** 1234"],
|
||||
["28.09.25", "29.09.25", "E.S. LA SELVA, COMAJULIANA", "ES", "EUR 8.40", "8.10", "**** **** **** 1234"],
|
||||
["28.09.25", "29.09.25", "AUTOROUTES ASF, VEDENE CEDEX", "FR", "EUR 15.60", "15.00", "**** **** **** 1234"],
|
||||
["27.09.25", "29.09.25", "AUTOROUTES ASF, VEDENE CEDEX", "FR", "EUR 24.40", "23.45", "**** **** **** 1234"],
|
||||
["29.09.25", "30.09.25", "OROMARKET SUPERMERCADOS, OROPESA", "ES", "EUR 17.32", "16.65", "**** **** **** 1234"],
|
||||
["29.09.25", "30.09.25", "QUESADA CENTER SUPERMERCA, OROPESA", "ES", "EUR 40.40", "38.85", "**** **** **** 1234"],
|
||||
["29.09.25", "30.09.25", "QUESADA CENTER SUPERMERCA, OROPESA", "ES", "EUR 22.55", "21.70", "**** **** **** 1234"],
|
||||
["29.09.25", "30.09.25", "ALDI OROPESA, OROPESA", "ES", "EUR 129.39", "124.40", "**** **** **** 1234"],
|
||||
["30.09.25", "01.10.25", "QUESADA CENTER, OROPESA DEL M", "ES", "EUR 84.05", "80.95", "**** **** **** 1234"],
|
||||
["30.09.25", "01.10.25", "PASSION CREPES, OROPESA", "ES", "EUR 10.30", "9.90", "**** **** **** 1234"],
|
||||
["30.09.25", "01.10.25", "MERCADONA MARINA DOR, ORPESA DEL MA", "ES", "EUR 17.53", "16.90", "**** **** **** 1234"],
|
||||
["30.09.25", "01.10.25", "Restaurante DRAGON, OROPESA", "ES", "EUR 75.00", "72.25", "**** **** **** 1234"],
|
||||
["30.09.25", "01.10.25", "OPENAI *CHATGPT SUBSCR, OPENAI.COM", "US", "USD 216.20", "177.55", "**** **** **** 1234"],
|
||||
["01.10.25", "02.10.25", "GOOGLE *ADS5192965135, cc§google.com", "IE", "29.60", "", "**** **** **** 1234"],
|
||||
["01.10.25", "02.10.25", "RTE PUERTA DEL SOL, OROPESA DEL M", "ES", "EUR 169.20", "163.10", "**** **** **** 1234"],
|
||||
["01.10.25", "02.10.25", "TREN TURISTICO OROPESA, OROPESA DEL M", "ES", "EUR 15.00", "14.45", "**** **** **** 1234"],
|
||||
["01.10.25", "02.10.25", "LANGDOCK GMBH, BERLIN", "DE", "EUR 25.00", "24.10", "**** **** **** 1234"],
|
||||
["01.10.25", "02.10.25", "WWW.PERPLEXITY.AI, WWW.PERPLEXIT", "US", "USD 10.81", "8.90", "**** **** **** 1234"],
|
||||
["02.10.25", "06.10.25", "GOOGLE *YouTubePremium, g.co/helppay#", "GB", "33.90", "", "**** **** **** 1234"],
|
||||
["02.10.25", "06.10.25", "WILLY LA CONCHA, OROPESA DEL M", "ES", "EUR 98.93", "95.40", "**** **** **** 1234"],
|
||||
["03.10.25", "06.10.25", "Netflix.com, Los Gatos", "NL", "20.90", "", "**** **** **** 1234"],
|
||||
["03.10.25", "06.10.25", "COALIMENT LA CONCHA, OROPESA DEL M", "ES", "EUR 11.74", "11.30", "**** **** **** 1234"],
|
||||
["03.10.25", "06.10.25", "DONA RESU, OROPESA", "ES", "EUR 7.30", "7.05", "**** **** **** 1234"],
|
||||
["04.10.25", "06.10.25", "MERCADONA MARINA DOR, ORPESA DEL MA", "ES", "EUR 89.50", "86.30", "**** **** **** 1234"],
|
||||
["04.10.25", "06.10.25", "QUESADA CENTER SUPERMERCA, OROPESA", "ES", "EUR 8.45", "8.15", "**** **** **** 1234"],
|
||||
["04.10.25", "06.10.25", "HELADERIA LAS DELICIAS, OROPESA DEL M", "ES", "EUR 10.80", "10.40", "**** **** **** 1234"],
|
||||
["04.10.25", "06.10.25", "REST. BISTROT, OROPESA DEL M", "ES", "EUR 117.90", "113.70", "**** **** **** 1234"],
|
||||
["04.10.25", "06.10.25", "Google PixVerse AI Vi, 650-2530000", "US", "5.20", "", "**** **** **** 1234"],
|
||||
["04.10.25", "06.10.25", "Google Duolingo Langu, 650-2530000", "US", "10.00", "", "**** **** **** 1234"],
|
||||
["05.10.25", "06.10.25", "HABANA, OROPESA", "ES", "EUR 3.00", "2.90", "**** **** **** 1234"],
|
||||
["05.10.25", "06.10.25", "HABANA, OROPESA", "ES", "EUR 9.00", "8.70", "**** **** **** 1234"],
|
||||
["05.10.25", "06.10.25", "RESTAURANTE, ORPESA", "ES", "EUR 87.75", "84.60", "**** **** **** 1234"],
|
||||
["05.10.25", "06.10.25", "HABANA, OROPESA", "ES", "EUR 15.50", "14.95", "**** **** **** 1234"],
|
||||
["06.10.25", "07.10.25", "HABANA, OROPESA", "ES", "EUR 25.00", "24.05", "**** **** **** 1234"],
|
||||
["06.10.25", "07.10.25", "QUESADA CENTER SUPERMERCA, OROPESA", "ES", "EUR 3.95", "3.80", "**** **** **** 1234"],
|
||||
["06.10.25", "07.10.25", "QUESADA CENTER SUPERMERCA, OROPESA", "ES", "EUR 47.75", "45.95", "**** **** **** 1234"],
|
||||
["07.10.25", "08.10.25", "MAGIC SPORT HALL OLYMPICS, OROPESA DEL M", "ES", "EUR 183.75", "176.70", "**** **** **** 1234"],
|
||||
["07.10.25", "08.10.25", "BKG*HOTEL AT BOOKING.C, (888)850-3958", "NL", "EUR 172.55", "165.90", "**** **** **** 1234"],
|
||||
["07.10.25", "08.10.25", "Wondershare, Hong Kong", "HK", "25.95", "", "**** **** **** 1234"],
|
||||
["07.10.25", "08.10.25", "MERCADONA MARINA DOR, ORPESA DEL MA", "ES", "EUR 99.13", "95.30", "**** **** **** 1234"],
|
||||
["07.10.25", "08.10.25", "RECEP HOTEL MAGIC SPORTS, OROPESA DEL M", "ES", "EUR 10.00", "9.60", "**** **** **** 1234"],
|
||||
["07.10.25", "08.10.25", "Google One, 650-2530000", "US", "10.00", "", "**** **** **** 1234"],
|
||||
["08.10.25", "09.10.25", "E.S.LA CLARIANA, MADRID", "ES", "EUR 98.07", "94.00", "**** **** **** 1234"],
|
||||
["08.10.25", "09.10.25", "AUTOROUTES ASF, VEDENE CEDEX", "FR", "EUR 44.20", "42.35", "**** **** **** 1234"],
|
||||
["08.10.25", "09.10.25", "A.R.E.A., 69671", "FR", "EUR 11.20", "10.75", "**** **** **** 1234"],
|
||||
["09.10.25", "10.10.25", "SOCAR station-service, Bursins", "CH", "113.10", "", "**** **** **** 1234"],
|
||||
["09.10.25", "10.10.25", "SOCAR station-service, Bursins", "CH", "6.80", "", "**** **** **** 1234"],
|
||||
["09.10.25", "10.10.25", "A.R.E.A., 69671", "FR", "EUR 15.00", "14.40", "**** **** **** 1234"],
|
||||
["08.10.25", "10.10.25", "DOMAINE DE ROZAN, LA TRONCHE", "FR", "EUR 110.00", "105.45", "**** **** **** 1234"],
|
||||
["09.10.25", "10.10.25", "DOMAINE DE ROZAN, LA TRONCHE", "FR", "EUR 40.00", "38.35", "**** **** **** 1234"],
|
||||
["10.10.25", "13.10.25", "Coop-1252 Wald, Wald ZH", "CH", "164.85", "", "**** **** **** 1234"],
|
||||
["10.10.25", "13.10.25", "CURSOR, AI POWERED IDE, CURSOR.COM", "US", "USD 20.00", "16.60", "**** **** **** 1234"],
|
||||
["11.10.25", "13.10.25", "Google PixVerse AI Vi, 650-2530000", "US", "5.20", "", "**** **** **** 1234"],
|
||||
["12.10.25", "13.10.25", "Cafe Konditorei Voland, Laupen ZH", "CH", "37.70", "", "**** **** **** 1234"],
|
||||
["12.10.25", "13.10.25", "KONDITOREI VOLAND LAUP, LAUPEN ZH", "CH", "17.35", "", "**** **** **** 1234"],
|
||||
["12.10.25", "13.10.25", "KONDITOREI VOLAND LAUP, LAUPEN ZH", "CH", "5.40", "", "**** **** **** 1234"],
|
||||
["12.09.25", "15.09.25", "SBB Bahnhof Ruti ZH, Ruti ZH", "CH", "54.00", "", "**** **** **** 1234"],
|
||||
["12.09.25", "15.09.25", "Rest Volkshaus, Zurich", "CH", "18.00", "", "**** **** **** 1234"],
|
||||
["12.09.25", "15.09.25", "Sora Sushi - HB Zurich, Zurich", "CH", "74.00", "", "**** **** **** 1234"],
|
||||
["12.09.25", "15.09.25", "176.32 avec, Ruti ZH", "CH", "2.45", "", "**** **** **** 1234"],
|
||||
["12.09.25", "15.09.25", "Baradox AG, Zurich", "CH", "15.00", "", "**** **** **** 1234"],
|
||||
["12.09.25", "15.09.25", "Volkshausstiftung Zurich, Zurich", "CH", "3.00", "", "**** **** **** 1234"],
|
||||
["13.09.25", "15.09.25", "SBB Bahnhof Ruti ZH, Ruti ZH", "CH", "9.20", "", "**** **** **** 1234"],
|
||||
["13.09.25", "15.09.25", "SBB Bahnhof Wald, Wald ZH", "CH", "27.00", "", "**** **** **** 1234"],
|
||||
["13.09.25", "15.09.25", "EXIL GMBH, ZUERICH", "CH", "14.00", "", "**** **** **** 1234"],
|
||||
["13.09.25", "15.09.25", "EXIL GMBH, ZUERICH", "CH", "14.00", "", "**** **** **** 1234"],
|
||||
["13.09.25", "15.09.25", "URBAN FOOD CLUTURE GMB, ZURICH", "CH", "135.00", "", "**** **** **** 1234"],
|
||||
["14.09.25", "15.09.25", "Google One, 650-2530000", "US", "100.00", "", "**** **** **** 1234"],
|
||||
["15.09.25", "16.09.25", "Ex Libris AG, Dietikon", "CH", "13.00", "", "**** **** **** 1234"],
|
||||
["15.09.25", "16.09.25", "Coop-1252 Wald, Wald ZH", "CH", "51.45", "", "**** **** **** 1234"],
|
||||
["16.09.25", "17.09.25", "Shell Waldhof, Wald ZH", "CH", "5.80", "", "**** **** **** 1234"],
|
||||
["19.09.25", "22.09.25", "LS Pirates AG, Hinwil", "CH", "16.05", "", "**** **** **** 1234"],
|
||||
["20.09.25", "22.09.25", "LS Pirates AG, Hinwil", "CH", "14.60", "", "**** **** **** 1234"],
|
||||
["20.09.25", "22.09.25", "LS Pirates AG, Hinwil", "CH", "13.55", "", "**** **** **** 1234"],
|
||||
["20.09.25", "22.09.25", "LS Pirates AG, Hinwil", "CH", "13.90", "", "**** **** **** 1234"],
|
||||
["20.09.25", "22.09.25", "Coop-1252 Wald, Wald ZH", "CH", "60.75", "", "**** **** **** 1234"],
|
||||
["20.09.25", "22.09.25", "MORE BAR GMBH, BUBIKON", "CH", "70.00", "", "**** **** **** 1234"],
|
||||
["21.09.25", "22.09.25", "LS Pirates AG, Hinwil", "CH", "6.40", "", "**** **** **** 1234"],
|
||||
["21.09.25", "22.09.25", "LS Pirates AG, Hinwil", "CH", "4.20", "", "**** **** **** 1234"],
|
||||
["21.09.25", "22.09.25", "LS Pirates AG, Hinwil", "CH", "13.45", "", "**** **** **** 1234"],
|
||||
["22.09.25", "23.09.25", "Migros M Wald, Wald ZH", "CH", "16.80", "", "**** **** **** 1234"],
|
||||
["22.09.25", "23.09.25", "BLEICHI + HOTEL, WALD", "CH", "43.00", "", "**** **** **** 1234"],
|
||||
["23.09.25", "24.09.25", "Coop-1252 Wald, Wald ZH", "CH", "155.75", "", "**** **** **** 1234"],
|
||||
["24.09.25", "25.09.25", "BKG*HOTEL AT BOOKING.C, (888)850-3958", "NL", "EUR 177.35", "170.35", "**** **** **** 1234"],
|
||||
["27.09.25", "29.09.25", "APODRO APOTHEKE WALD, WALD ZH", "CH", "21.50", "", "**** **** **** 1234"],
|
||||
["28.09.25", "29.09.25", "SELVA HOSTELERIA, MACANET DE LA", "ES", "15.75", "", "**** **** **** 1234"],
|
||||
["28.09.25", "29.09.25", "AREAS LA SELVA, BARCELONA", "ES", "EUR 19.11", "18.40", "**** **** **** 1234"],
|
||||
["02.10.25", "06.10.25", "GOOGLE *YouTube Member, g.co/helppay#", "GB", "15.00", "", "**** **** **** 1234"],
|
||||
["01.10.25", "06.10.25", "Eventfrog.c 737909203525, Olten", "CH", "114.95", "", "**** **** **** 1234"],
|
||||
["06.10.25", "07.10.25", "digitec Galaxus (Online), Zurich", "CH", "23.80", "", "**** **** **** 1234"],
|
||||
["08.10.25", "09.10.25", "E.S.LA CLARIANA, MADRID", "ES", "EUR 29.58", "28.35", "**** **** **** 1234"],
|
||||
["10.10.25", "13.10.25", "B2BAND.COM, BRATISLAVA", "SK", "72.45", "", "**** **** **** 1234"],
|
||||
["10.10.25", "13.10.25", "Ticketcorner*89987227, 410900800800", "CH", "199.80", "", "**** **** **** 1234"],
|
||||
["10.10.25", "13.10.25", "SP NORAYA, RUMISBERG", "CH", "79.90", "", "**** **** **** 1234"],
|
||||
["11.10.25", "13.10.25", "B2BAND.COM, BRATISLAVA", "SK", "139.95", "", "**** **** **** 1234"],
|
||||
["11.10.25", "13.10.25", "TEMU.COM, BASEL", "CH", "81.20", "", "**** **** **** 1234"],
|
||||
["11.10.25", "13.10.25", "NOII.CH DATING, WINTERTHUR", "CH", "74.10", "", "**** **** **** 1234"],
|
||||
["12.10.25", "13.10.25", "Rest Volkshaus, Zurich", "CH", "9.00", "", "**** **** **** 1234"],
|
||||
["12.10.25", "13.10.25", "Shell Heuberg, Forch", "CH", "100.10", "", "**** **** **** 1234"],
|
||||
["12.10.25", "13.10.25", "Parkhaus Helvetiaplatz, Zurich", "CH", "8.00", "", "**** **** **** 1234"],
|
||||
["14.10.25", "15.10.25", "P2 Parkhaus Ein- & Ausfah, Zurich CH", "CHF", "5.00", "", "**** **** **** 1234"],
|
||||
["14.10.25", "15.10.25", "Migros Zurich Airport, Zurich CH", "CHF", "16.35", "", "**** **** **** 1234"],
|
||||
["14.10.25", "15.10.25", "GITHUB, INC., GITHUB.COM US", "USD", "0.30", "0.25", "**** **** **** 1234"],
|
||||
["15.10.25", "16.10.25", "Dosenbach Schuhe & Sport, Hinwil CH", "CHF", "50.00", "", "**** **** **** 1234"],
|
||||
["15.10.25", "16.10.25", "Coop-4253 Hinwil Wasseri, Hinwil CH", "CHF", "257.20", "", "**** **** **** 1234"],
|
||||
["15.10.25", "16.10.25", "Landi, Wald CH", "CHF", "67.85", "", "**** **** **** 1234"],
|
||||
["15.10.25", "16.10.25", "Puls Apotheke & Drogerie, Hinwil CH", "CHF", "9.20", "", "**** **** **** 1234"],
|
||||
["15.10.25", "16.10.25", "CLAUDE.AI SUBSCRIPTION, ANTHROPIC.COM US", "USD", "108.10", "89.50", "**** **** **** 1234"],
|
||||
["17.10.25", "20.10.25", "SV (Schweiz) AG, 27960, Zurich ETH-Ze CH", "CHF", "7.80", "", "**** **** **** 1234"],
|
||||
["17.10.25", "20.10.25", "SV (Schweiz) AG, 27960, Zurich ETH-Ze CH", "CHF", "14.50", "", "**** **** **** 1234"],
|
||||
["17.10.25", "20.10.25", "SV (Schweiz) AG, 27960, Zurich ETH-Ze CH", "CHF", "4.20", "", "**** **** **** 1234"],
|
||||
["17.10.25", "20.10.25", "Universitatsspital Zurich, Zurich CH", "CHF", "30.00", "", "**** **** **** 1234"],
|
||||
["18.10.25", "20.10.25", "HubSpot Germany GmbH, Berlin DE", "EUR", "267.55", "256.05", "**** **** **** 1234"],
|
||||
["18.10.25", "20.10.25", "Google PixVerse AI Vi, 650-2530000 US", "USD", "5.20", "", "**** **** **** 1234"],
|
||||
["19.10.25", "20.10.25", "Shell Waldhof, Wald ZH CH", "CHF", "7.20", "", "**** **** **** 1234"],
|
||||
["19.10.25", "20.10.25", "KONDITOREI VOLAND WALD, WALD ZH CH", "CHF", "20.30", "", "**** **** **** 1234"],
|
||||
["19.10.25", "20.10.25", "KONDITOREI VOLAND WALD, WALD ZH CH", "CHF", "11.10", "", "**** **** **** 1234"],
|
||||
["18.10.25", "20.10.25", "ANTHROPIC, ANTHROPIC.COM US", "USD", "108.10", "88.75", "**** **** **** 1234"],
|
||||
["20.10.25", "21.10.25", "APCOA, Dubendorf CH", "CHF", "20.00", "", "**** **** **** 1234"],
|
||||
["20.10.25", "21.10.25", "STWEG Ambassador House, Glattbrugg CH", "CHF", "5.00", "", "**** **** **** 1234"],
|
||||
["23.10.25", "24.10.25", "Coop-1252 Wald, Wald ZH CH", "CHF", "199.85", "", "**** **** **** 1234"],
|
||||
["24.10.25", "24.10.25", "Ticketcorner*90004263, 410900800800 CH", "CHF", "159.75", "", "**** **** **** 1234"],
|
||||
["25.10.25", "27.10.25", "Google Duolingo Langu, 650-2530000 US", "USD", "10.00", "", "**** **** **** 1234"],
|
||||
["25.10.25", "27.10.25", "Hornbach Baumarkt Galgene, Galgenen CH", "CHF", "1.50", "", "**** **** **** 1234"],
|
||||
["25.10.25", "27.10.25", "Hornbach Baumarkt Galgene, Galgenen CH", "CHF", "814.10", "", "**** **** **** 1234"],
|
||||
["25.10.25", "27.10.25", "REMO WUEST BACK. KOND., GALGENEN CH", "CHF", "20.00", "", "**** **** **** 1234"],
|
||||
["25.10.25", "27.10.25", "Google PixVerse AI Vi, 650-2530000 US", "USD", "5.20", "", "**** **** **** 1234"],
|
||||
["27.10.25", "28.10.25", "Coop-1252 Wald, Wald ZH CH", "CHF", "12.90", "", "**** **** **** 1234"],
|
||||
["29.10.25", "30.10.25", "WAL*WESTHIVE, DUEBENDORF CH", "CHF", "15.30", "", "**** **** **** 1234"],
|
||||
["29.10.25", "30.10.25", "WAL*WESTHIVE, DUEBENDORF CH", "CHF", "6.50", "", "**** **** **** 1234"],
|
||||
["30.10.25", "31.10.25", "Coop-4253 Hinwil Wasseri, Hinwil CH", "CHF", "139.85", "", "**** **** **** 1234"],
|
||||
["30.10.25", "31.10.25", "Coop-4054 Hinwil Restaura, Hinwil CH", "CHF", "34.95", "", "**** **** **** 1234"],
|
||||
["31.10.25", "03.11.25", "Coop-1911 Ruti, Ruti ZH CH", "CHF", "66.50", "", "**** **** **** 1234"],
|
||||
["31.10.25", "03.11.25", "OPENAI *CHATGPT SUBSCR, OPENAI.COM US", "USD", "216.20", "178.70", "**** **** **** 1234"],
|
||||
["01.11.25", "03.11.25", "GOOGLE *ADS5192965135, cc§google.com IE", "", "79.15", "", "**** **** **** 1234"],
|
||||
["01.11.25", "03.11.25", "KONDITOREI VOLAND WALD, WALD ZH CH", "CHF", "99.60", "", "**** **** **** 1234"],
|
||||
["01.11.25", "03.11.25", "LANGDOCK GMBH, BERLIN DE", "EUR", "25.00", "23.90", "**** **** **** 1234"],
|
||||
["01.11.25", "03.11.25", "Google PixVerse AI Vi, 650-2530000 US", "USD", "5.20", "", "**** **** **** 1234"],
|
||||
["02.11.25", "03.11.25", "GOOGLE *YouTubePremium, g.co/helppay# GB", "", "33.90", "", "**** **** **** 1234"],
|
||||
["02.11.25", "03.11.25", "Agrola TopShop Wald, Wald ZH CH", "CHF", "119.45", "", "**** **** **** 1234"],
|
||||
["03.11.25", "03.11.25", "Netflix.com, Los Gatos NL", "", "20.90", "", "**** **** **** 1234"],
|
||||
["03.11.25", "04.11.25", "www.fust.ch, Oberburen CH", "CHF", "1'560.90", "", "**** **** **** 1234"],
|
||||
["06.11.25", "07.11.25", "Grand Casino Luzern AG, Luzern CH", "CHF", "100.00", "108.00", "**** **** **** 1234"],
|
||||
["07.11.25", "10.11.25", "Migros M Mutschellen, Berikon CH", "CHF", "0.40", "", "**** **** **** 1234"],
|
||||
["07.11.25", "10.11.25", "Migros M Mutschellen, Berikon CH", "CHF", "15.90", "", "**** **** **** 1234"],
|
||||
["08.11.25", "10.11.25", "wondershare.com, Hong Kong HK", "", "25.95", "", "**** **** **** 1234"],
|
||||
["07.11.25", "10.11.25", "WAL*WESTHIVE, DUEBENDORF CH", "CHF", "9.85", "", "**** **** **** 1234"],
|
||||
["07.11.25", "10.11.25", "Google One, 650-2530000 US", "USD", "10.00", "", "**** **** **** 1234"],
|
||||
["08.11.25", "10.11.25", "Steiner-Beck AG, Wald ZH CH", "CHF", "32.20", "", "**** **** **** 1234"],
|
||||
["08.11.25", "10.11.25", "Google PixVerse AI Vi, 650-2530000 US", "USD", "5.20", "", "**** **** **** 1234"],
|
||||
["09.11.25", "10.11.25", "KONDITOREI VOLAND WALD, WALD ZH CH", "CHF", "25.80", "", "**** **** **** 1234"],
|
||||
["09.11.25", "11.11.25", "Starfood AG (Schweiz), Rothenburg CH", "CHF", "16.00", "", "**** **** **** 1234"],
|
||||
["09.11.25", "11.11.25", "Starfood AG (Schweiz), Rothenburg CH", "CHF", "16.00", "", "**** **** **** 1234"],
|
||||
["10.11.25", "11.11.25", "Coop-2253 Jona Eisenhof, Jona CH", "CHF", "161.25", "", "**** **** **** 1234"],
|
||||
["12.11.25", "13.11.25", "Hess AG Erdbau + Recy, Laupen ZH CH", "CHF", "39.20", "", "**** **** **** 1234"],
|
||||
["12.11.25", "13.11.25", "Jumbo-6017 Hinwil, Hinwil CH", "CHF", "173.70", "", "**** **** **** 1234"],
|
||||
["13.11.25", "14.11.25", "Migros MM Ruti, Ruti ZH CH", "CHF", "57.90", "", "**** **** **** 1234"],
|
||||
["13.11.25", "14.11.25", "Migros MM Ruti, Ruti ZH CH", "CHF", "140.10", "", "**** **** **** 1234"],
|
||||
["13.11.25", "14.11.25", "WAL*WESTHIVE, DUEBENDORF CH", "CHF", "22.30", "", "**** **** **** 1234"],
|
||||
["13.11.25", "14.11.25", "UDIO.COM, UDIO.COM US", "EUR", "36.00", "34.35", "**** **** **** 1234"],
|
||||
["15.10.25", "16.10.25", "Coop-4958 Rapperswil, Rapperswil SG CH", "CHF", "4.95", "", "**** **** **** 1234"],
|
||||
["16.10.25", "17.10.25", "TEMU.COM, BASEL CH", "CHF", "61.50", "", "**** **** **** 1234"],
|
||||
["16.10.25", "17.10.25", "TEMU.COM, BASEL CH", "CHF", "12.95", "", "**** **** **** 1234"],
|
||||
["16.10.25", "17.10.25", "TEMU.COM, BASEL CH", "CHF", "32.30", "", "**** **** **** 1234"],
|
||||
["16.10.25", "17.10.25", "TEMU.COM, BASEL CH", "CHF", "17.95", "", "**** **** **** 1234"],
|
||||
["17.10.25", "20.10.25", "SBB Bahnhof Ruti ZH, Ruti ZH CH", "CHF", "54.00", "", "**** **** **** 1234"],
|
||||
["17.10.25", "20.10.25", "Candrian Catering AG 2, Zurich CH", "CHF", "15.50", "", "**** **** **** 1234"],
|
||||
["20.10.25", "21.10.25", "Coop-1252 Wald, Wald ZH CH", "CHF", "178.95", "", "**** **** **** 1234"],
|
||||
["21.10.25", "22.10.25", "Denner Ruti ZH, Ruti ZH CH", "CHF", "50.15", "", "**** **** **** 1234"],
|
||||
["24.10.25", "27.10.25", "TEMU.COM, BASEL CH", "CHF", "100.65", "", "**** **** **** 1234"],
|
||||
["24.10.25", "27.10.25", "WAL*HAAR SHOP CH AG, UETENDORF CH", "CHF", "70.35", "", "**** **** **** 1234"],
|
||||
["25.10.25", "27.10.25", "Coop-1252 Wald, Wald ZH CH", "CHF", "47.00", "", "**** **** **** 1234"],
|
||||
["25.10.25", "27.10.25", "Shell Waldhof, Wald ZH CH", "CHF", "3.20", "", "**** **** **** 1234"],
|
||||
["26.10.25", "27.10.25", "TEMU.COM, BASEL CH", "CHF", "63.10", "", "**** **** **** 1234"],
|
||||
["27.10.25", "28.10.25", "ONLY, Hinwil CH", "CHF", "222.60", "", "**** **** **** 1234"],
|
||||
["27.10.25", "28.10.25", "Coop-4253 Hinwil Wasseri, Hinwil CH", "CHF", "104.10", "", "**** **** **** 1234"],
|
||||
["27.10.25", "28.10.25", "Coop-4253 Hinwil Wasseri, Hinwil CH", "CHF", "24.95", "", "**** **** **** 1234"],
|
||||
["27.10.25", "28.10.25", "Manor AG, Hinwil CH", "CHF", "177.25", "", "**** **** **** 1234"],
|
||||
["27.10.25", "28.10.25", "H & M, Hinwil CH", "CHF", "43.85", "", "**** **** **** 1234"],
|
||||
["27.10.25", "28.10.25", "Coop-4253 Hinwil Wasseri, Hinwil CH", "CHF", "52.30", "", "**** **** **** 1234"],
|
||||
["27.10.25", "28.10.25", "Manor AG, Hinwil CH", "CHF", "59.05", "", "**** **** **** 1234"],
|
||||
["28.10.25", "29.10.25", "Migros MM Rapperswil, Rapperswil SG CH", "CHF", "23.35", "", "**** **** **** 1234"],
|
||||
["29.10.25", "30.10.25", "ROSSMANN Schweiz AG, Wallisellen CH", "CHF", "13.95", "", "**** **** **** 1234"],
|
||||
["29.10.25", "30.10.25", "Migros MR Glattzentrum, Glattzentrum CH", "CHF", "42.20", "", "**** **** **** 1234"],
|
||||
["29.10.25", "30.10.25", "Calzedonia, Wallisellen CH", "CHF", "178.25", "", "**** **** **** 1234"],
|
||||
["29.10.25", "30.10.25", "Intimissimi, Wallisellen CH", "CHF", "90.20", "", "**** **** **** 1234"],
|
||||
["29.10.25", "30.10.25", "New Yorker (Schweiz) GmbH, Wetzikon ZH CH", "CHF", "76.80", "", "**** **** **** 1234"],
|
||||
["29.10.25", "30.10.25", "New Yorker (Schweiz) GmbH, Wetzikon ZH CH", "CHF", "7.95", "", "**** **** **** 1234"],
|
||||
["29.10.25", "30.10.25", "Golden Bar GmbH, Wald ZH CH", "CHF", "40.00", "", "**** **** **** 1234"],
|
||||
["30.10.25", "31.10.25", "Frischer Max, Zurich CH", "CHF", "12.60", "", "**** **** **** 1234"],
|
||||
["30.10.25", "31.10.25", "Frischer Max, Zurich CH", "CHF", "4.20", "", "**** **** **** 1234"],
|
||||
["30.10.25", "31.10.25", "Halle 622, Zurich CH", "CHF", "15.75", "", "**** **** **** 1234"],
|
||||
["31.10.25", "03.11.25", "SBB Bahnhof Ruti ZH, Ruti ZH CH", "CHF", "27.00", "", "**** **** **** 1234"],
|
||||
["31.10.25", "03.11.25", "Eventfrog.c 739003945141, Olten CH", "CHF", "67.85", "", "**** **** **** 1234"],
|
||||
["01.11.25", "03.11.25", "SBB Bahnhof Ruti ZH, Ruti ZH CH", "CHF", "27.00", "", "**** **** **** 1234"],
|
||||
["01.11.25", "03.11.25", "AMERON ZUERICH, ZUERICH CH", "CHF", "30.00", "", "**** **** **** 1234"],
|
||||
["31.10.25", "03.11.25", "SKYLINE EVENTS, ZUERICH CH", "CHF", "13.50", "", "**** **** **** 1234"],
|
||||
["02.11.25", "03.11.25", "AURA Event Saal, Zuerich CH", "CHF", "15.75", "", "**** **** **** 1234"],
|
||||
["02.11.25", "03.11.25", "GOOGLE *YouTube Member, g.co/helppay# GB", "", "15.00", "", "**** **** **** 1234"],
|
||||
["01.11.25", "03.11.25", "VBZ Bellevue, Zurich CH", "CHF", "2.80", "", "**** **** **** 1234"],
|
||||
["01.11.25", "03.11.25", "WAL*CLUB BELLEVUE, HOERI CH", "CHF", "16.50", "", "**** **** **** 1234"],
|
||||
["02.11.25", "03.11.25", "MCDONALDS ZUERICH 2016, ZUERICH CH", "CHF", "10.50", "", "**** **** **** 1234"],
|
||||
["03.11.25", "04.11.25", "Coop-1252 Wald, Wald ZH CH", "CHF", "191.15", "", "**** **** **** 1234"],
|
||||
["05.11.25", "06.11.25", "Coop-1252 Wald, Wald ZH CH", "CHF", "51.35", "", "**** **** **** 1234"],
|
||||
["06.11.25", "07.11.25", "Ticketcorner*90024523, 410900800800 CH", "CHF", "158.75", "", "**** **** **** 1234"],
|
||||
["07.11.25", "10.11.25", "SUMUP *JW BROW&LASH, LACHEN CH", "CHF", "290.00", "", "**** **** **** 1234"],
|
||||
["07.11.25", "10.11.25", "Agrola TopShop Wald, Wald ZH CH", "CHF", "104.50", "", "**** **** **** 1234"],
|
||||
["07.11.25", "10.11.25", "Agrola TopShop Wald, Wald ZH CH", "CHF", "10.30", "", "**** **** **** 1234"],
|
||||
["08.11.25", "10.11.25", "Pizza Thal GmbH, Murgenthal CH", "CHF", "19.50", "", "**** **** **** 1234"],
|
||||
["09.11.25", "10.11.25", "TEMU.COM, BASEL CH", "CHF", "190.85", "", "**** **** **** 1234"],
|
||||
["10.11.25", "11.11.25", "Sinora GmbH, Bonstetten CH", "CHF", "115.20", "", "**** **** **** 1234"],
|
||||
["10.11.25", "11.11.25", "WAL*HAAR SHOP CH AG, UETENDORF CH", "CHF", "33.85", "", "**** **** **** 1234"],
|
||||
["11.11.25", "12.11.25", "Bleiche Fitness, Wald ZH CH", "CHF", "90.00", "", "**** **** **** 1234"],
|
||||
["11.11.25", "12.11.25", "Parkhaus Urania, Zurich CH", "CHF", "14.00", "", "**** **** **** 1234"],
|
||||
["12.11.25", "13.11.25", "Coop-4958 Rapperswil, Rapperswil SG CH", "CHF", "24.80", "", "**** **** **** 1234"],
|
||||
["13.11.25", "14.11.25", "TEMU.COM, BASEL CH", "CHF", "56.00", "", "**** **** **** 1234"],
|
||||
["13.11.25", "14.11.25", "TEMU.COM, BASEL CH", "CHF", "5.95", "", "**** **** **** 1234"],
|
||||
["13.11.25", "14.11.25", "TEMU.COM, BASEL CH", "CHF", "15.25", "", "**** **** **** 1234"],
|
||||
["14.11.25", "17.11.25", "Santa Lucia Altstetten, Zurich", "CH", "38.00", "", "**** **** **** 1234"],
|
||||
["14.11.25", "17.11.25", "Agrola TopShop Wald, Wald ZH", "CH", "126.80", "", "**** **** **** 1234"],
|
||||
["14.11.25", "17.11.25", "GITHUB, INC., GITHUB.COM", "US", "USD 0.70", "0.60", "**** **** **** 1234"],
|
||||
["15.11.25", "17.11.25", "Jumbo-6017 Hinwil, Hinwil", "CH", "53.85", "", "**** **** **** 1234"],
|
||||
["15.11.25", "17.11.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "57.00", "", "**** **** **** 1234"],
|
||||
["15.11.25", "17.11.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "13.95", "", "**** **** **** 1234"],
|
||||
["14.11.25", "17.11.25", "NEGISHI ALTSTETTEN BAH, ZUERICH", "CH", "31.90", "", "**** **** **** 1234"],
|
||||
["15.11.25", "17.11.25", "Google PixVerse AI Vi, 650-2530000", "US", "5.20", "", "**** **** **** 1234"],
|
||||
["15.11.25", "17.11.25", "CANVA* I04701-26464248, CANVA.COM", "US", "12.00", "", "**** **** **** 1234"],
|
||||
["17.11.25", "18.11.25", "ANTHROPIC, ANTHROPIC.COM", "US", "USD 270.25", "220.65", "**** **** **** 1234"],
|
||||
["18.11.25", "19.11.25", "Coop-1252 Wald, Wald ZH", "CH", "7.80", "", "**** **** **** 1234"],
|
||||
["18.11.25", "19.11.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "19.30", "", "**** **** **** 1234"],
|
||||
["18.11.25", "19.11.25", "TIERARZTPRAXIS BACHTEL, WALD ZH", "CH", "343.30", "", "**** **** **** 1234"],
|
||||
["18.11.25", "19.11.25", "ANTHROPIC, ANTHROPIC.COM", "US", "USD 5.41", "4.45", "**** **** **** 1234"],
|
||||
["18.11.25", "20.11.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "19.35", "", "**** **** **** 1234"],
|
||||
["19.11.25", "20.11.25", "Wuest Partner, Zurich", "CH", "324.30", "", "**** **** **** 1234"],
|
||||
["19.11.25", "21.11.25", "SHOP.ASFINAG.AT, WIEN", "AT", "EUR 12.40", "11.80", "**** **** **** 1234"],
|
||||
["20.11.25", "21.11.25", "Coop-1252 Wald, Wald ZH", "CH", "85.35", "", "**** **** **** 1234"],
|
||||
["20.11.25", "21.11.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "17.95", "", "**** **** **** 1234"],
|
||||
["20.11.25", "21.11.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "6.30", "", "**** **** **** 1234"],
|
||||
["21.11.25", "24.11.25", "STWEG Ambassador House, Glattbrugg", "CH", "7.50", "", "**** **** **** 1234"],
|
||||
["21.11.25", "24.11.25", "Migros M Dubendorf Stettb, Dubendorf", "CH", "16.95", "", "**** **** **** 1234"],
|
||||
["21.11.25", "24.11.25", "MCDONALDS RESTAURANT G, WALLISELLEN", "CH", "13.00", "", "**** **** **** 1234"],
|
||||
["22.11.25", "24.11.25", "Ski- und Snowboard-Center, Neuhaus SG", "CH", "128.00", "", "**** **** **** 1234"],
|
||||
["21.11.25", "24.11.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "408.25", "", "**** **** **** 1234"],
|
||||
["22.11.25", "24.11.25", "GOOGLE *Duolingo Langu, g.co/HelpPay#", "US", "9.20", "", "**** **** **** 1234"],
|
||||
["23.11.25", "24.11.25", "Migros Santispark Bad, Abtwil SG", "CH", "48.60", "", "**** **** **** 1234"],
|
||||
["23.11.25", "24.11.25", "Migros Santispark Bad, Abtwil SG", "CH", "8.50", "", "**** **** **** 1234"],
|
||||
["23.11.25", "24.11.25", "Migros ELS Santispark PH, Abtwil SG", "CH", "3.00", "", "**** **** **** 1234"],
|
||||
["23.11.25", "24.11.25", "AVIA Service Autogrill, St. Margrethe", "CH", "121.80", "", "**** **** **** 1234"],
|
||||
["23.11.25", "24.11.25", "AVIA Service Autogrill, St. Margrethe", "CH", "10.50", "", "**** **** **** 1234"],
|
||||
["23.11.25", "24.11.25", "KONDITOREI VOLAND LAUP, LAUPEN ZH", "CH", "62.80", "", "**** **** **** 1234"],
|
||||
["23.11.25", "25.11.25", "SHOP.ASFINAG.AT, WIEN", "AT", "EUR 9.30", "8.90", "**** **** **** 1234"],
|
||||
["24.11.25", "25.11.25", "Landi, Wald", "CH", "27.15", "", "**** **** **** 1234"],
|
||||
["24.11.25", "26.11.25", "SHOP.ASFINAG.AT, WIEN", "AT", "EUR 12.50", "11.95", "**** **** **** 1234"],
|
||||
["26.11.25", "27.11.25", "MyPlace, Affoltern am", "CH", "10.30", "", "**** **** **** 1234"],
|
||||
["27.11.25", "28.11.25", "Coop-1911 Ruti, Ruti ZH", "CH", "57.20", "", "**** **** **** 1234"],
|
||||
["28.11.25", "01.12.25", "Manor AG, Hinwil", "CH", "10.10", "", "**** **** **** 1234"],
|
||||
["28.11.25", "01.12.25", "Manor AG, Hinwil", "CH", "136.25", "", "**** **** **** 1234"],
|
||||
["28.11.25", "01.12.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "205.35", "", "**** **** **** 1234"],
|
||||
["01.12.25", "02.12.25", "GOOGLE *ADS5192965135, cc§google.com", "IE", "59.00", "", "**** **** **** 1234"],
|
||||
["02.12.25", "03.12.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "112.50", "", "**** **** **** 1234"],
|
||||
["02.12.25", "03.12.25", "Coop-1252 Wald, Wald ZH", "CH", "117.70", "", "**** **** **** 1234"],
|
||||
["03.12.25", "03.12.25", "Autodesk ADY, Dublin 2", "IE", "1'989.05", "", "**** **** **** 1234"],
|
||||
["03.12.25", "03.12.25", "NETFLIX.COM, Amsterdam", "NL", "22.90", "", "**** **** **** 1234"],
|
||||
["02.12.25", "03.12.25", "TAVILY AI, WWW.TAVILY.CO", "US", "USD 17.48", "14.50", "**** **** **** 1234"],
|
||||
["02.12.25", "03.12.25", "GOOGLE *YouTubePremium, g.co/HelpPay#", "US", "33.90", "", "**** **** **** 1234"],
|
||||
["04.12.25", "05.12.25", "Migros M Dubendorf Stettb, Dubendorf", "CH", "103.20", "", "**** **** **** 1234"],
|
||||
["04.12.25", "05.12.25", "WAL*WESTHIVE, DUEBENDORF", "CH", "19.80", "", "**** **** **** 1234"],
|
||||
["05.12.25", "08.12.25", "MICROSOFT#G127221615, MSBILL.INFO", "CH", "55.20", "", "**** **** **** 1234"],
|
||||
["04.12.25", "08.12.25", "Ristorante Amalfi AG, Zurich", "CH", "67.00", "", "**** **** **** 1234"],
|
||||
["05.12.25", "08.12.25", "Landi, Wald", "CH", "11.90", "", "**** **** **** 1234"],
|
||||
["05.12.25", "08.12.25", "Notariat Wald, Wald ZH", "CH", "40.00", "", "**** **** **** 1234"],
|
||||
["05.12.25", "08.12.25", "Coop-1252 Wald, Wald ZH", "CH", "149.75", "", "**** **** **** 1234"],
|
||||
["06.12.25", "08.12.25", "TIERARZTPRAXIS BACHTEL, WALD ZH", "CH", "80.30", "", "**** **** **** 1234"],
|
||||
["07.12.25", "08.12.25", "HERAHELP.COM, 0044330027088", "CY", "EUR 19.95", "19.25", "**** **** **** 1234"],
|
||||
["07.12.25", "08.12.25", "Google One, 650-2530000", "US", "10.00", "", "**** **** **** 1234"],
|
||||
["10.12.25", "11.12.25", "TAVILY AI, WWW.TAVILY.CO", "US", "USD 43.26", "35.95", "**** **** **** 1234"],
|
||||
["11.12.25", "12.12.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "247.40", "", "**** **** **** 1234"],
|
||||
["14.11.25", "17.11.25", "ONLY, Zurich", "CH", "101.75", "", "**** **** **** 1234"],
|
||||
["14.11.25", "17.11.25", "SUMUP *MARYS COSMETICS, USTER", "CH", "419.00", "", "**** **** **** 1234"],
|
||||
["14.11.25", "17.11.25", "S2P*Calzedonia, 0447554090", "IT", "86.75", "", "**** **** **** 1234"],
|
||||
["14.11.25", "17.11.25", "Parkhaus Urania, Zurich", "CH", "12.00", "", "**** **** **** 1234"],
|
||||
["15.11.25", "17.11.25", "JustEat, Zurich", "CH", "193.70", "", "**** **** **** 1234"],
|
||||
["15.11.25", "17.11.25", "ONLY, Hinwil", "CH", "126.10", "", "**** **** **** 1234"],
|
||||
["15.11.25", "17.11.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "242.70", "", "**** **** **** 1234"],
|
||||
["15.11.25", "17.11.25", "Manor AG, Hinwil", "CH", "35.35", "", "**** **** **** 1234"],
|
||||
["15.11.25", "17.11.25", "Valentyna Nails, R?ti", "CH", "160.00", "", "**** **** **** 1234"],
|
||||
["13.11.25", "17.11.25", "redcare-apotheke, Sevenum", "NL", "79.90", "", "**** **** **** 1234"],
|
||||
["16.11.25", "17.11.25", "NORDSTERN, Basel", "CH", "64.20", "", "**** **** **** 1234"],
|
||||
["18.11.25", "19.11.25", "La Makeup Sp. z. o.o., Warsaw", "PL", "104.85", "", "**** **** **** 1234"],
|
||||
["18.11.25", "19.11.25", "Bleiche Fitness, Wald ZH", "CH", "90.00", "", "**** **** **** 1234"],
|
||||
["20.11.25", "21.11.25", "Bleiche Fitness, Wald ZH", "CH", "90.00", "", "**** **** **** 1234"],
|
||||
["22.11.25", "24.11.25", "SHELL PETTNAU 5538, PETTNAU", "AT", "94.60", "", "**** **** **** 1234"],
|
||||
["22.11.25", "24.11.25", "SHELL PETTNAU 5538, PETTNAU", "AT", "EUR 7.39", "7.05", "**** **** **** 1234"],
|
||||
["22.11.25", "24.11.25", "SHELL PETTNAU 5538, PETTNAU", "AT", "EUR 4.39", "4.20", "**** **** **** 1234"],
|
||||
["22.11.25", "24.11.25", "Coop-1252 Wald, Wald ZH", "CH", "57.85", "", "**** **** **** 1234"],
|
||||
["22.11.25", "24.11.25", "ASFINAG S16 HMS ST JAKOB, ST.ANTON/ARLB", "AT", "EUR 12.50", "11.95", "**** **** **** 1234"],
|
||||
["24.11.25", "25.11.25", "Posthotel Achenkirc, Achenkirch", "AT", "EUR 1'211.80", "1'160.25", "**** **** **** 1234"],
|
||||
["24.11.25", "25.11.25", "MS* CITYPOP2NIGHTBASE, ZURICH", "CH", "15.00", "", "**** **** **** 1234"],
|
||||
["24.11.25", "25.11.25", "MS* CITYPOP2NIGHTBASE, ZURICH", "CH", "8.40", "", "**** **** **** 1234"],
|
||||
["24.11.25", "25.11.25", "BKG*BOOKING.COM HOTEL, (888)850-3958", "NL", "187.95", "", "**** **** **** 1234"],
|
||||
["25.11.25", "26.11.25", "Coop-1252 Wald, Wald ZH", "CH", "63.00", "", "**** **** **** 1234"],
|
||||
["25.11.25", "26.11.25", "Bleiche Fitness, Wald ZH", "CH", "90.00", "", "**** **** **** 1234"],
|
||||
["26.11.25", "27.11.25", "Hallenbad Wald, Wald ZH", "CH", "54.00", "", "**** **** **** 1234"],
|
||||
["27.11.25", "28.11.25", "Bestseller AS, Amsterdam", "NL", "35.90", "", "**** **** **** 1234"],
|
||||
["29.11.25", "01.12.25", "CLUB NORDSTERN, BASEL", "CH", "6.00", "", "**** **** **** 1234"],
|
||||
["29.11.25", "01.12.25", "CLUB NORDSTERN, BASEL", "CH", "6.00", "", "**** **** **** 1234"],
|
||||
["02.12.25", "03.12.25", "TICKETCORNER CH, RUEMLANG", "CH", "84.90", "", "**** **** **** 1234"],
|
||||
["02.12.25", "03.12.25", "Shell Waldhof, Wald ZH", "CH", "126.15", "", "**** **** **** 1234"],
|
||||
["02.12.25", "03.12.25", "Shell Waldhof, Wald ZH", "CH", "3.70", "", "**** **** **** 1234"],
|
||||
["02.12.25", "03.12.25", "Bleiche Fitness, Wald ZH", "CH", "90.00", "", "**** **** **** 1234"],
|
||||
["02.12.25", "03.12.25", "GOOGLE *YouTube Member, g.co/HelpPay#", "US", "15.00", "", "**** **** **** 1234"],
|
||||
["03.12.25", "04.12.25", "APODRO APOTHEKE WALD, WALD ZH", "CH", "54.90", "", "**** **** **** 1234"],
|
||||
["04.12.25", "05.12.25", "TICKETCORNER CH, RUEMLANG", "CH", "285.70", "", "**** **** **** 1234"],
|
||||
["05.12.25", "05.12.25", "NOII.CH DATING, WINTERTHUR", "CH", "74.10", "", "**** **** **** 1234"],
|
||||
["06.12.25", "08.12.25", "TEMU.COM, BASEL", "CH", "72.50", "", "**** **** **** 1234"],
|
||||
["06.12.25", "08.12.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "326.85", "", "**** **** **** 1234"],
|
||||
["06.12.25", "08.12.25", "Decathlon, Hinwil", "CH", "150.00", "", "**** **** **** 1234"],
|
||||
["07.12.25", "09.12.25", "Migros-Genossenschafts-Bund, Zürich", "CH", "200.00", "", "**** **** **** 1234"],
|
||||
["08.12.25", "10.12.25", "Zürich HB, Zürich", "CH", "45.00", "", "**** **** **** 1234"],
|
||||
["09.12.25", "11.12.25", "Amazon Marketplace, amazon.de", "DE", "120.00", "", "**** **** **** 1234"],
|
||||
["10.12.25", "12.12.25", "IKEA, Dietlikon", "CH", "350.00", "", "**** **** **** 1234"],
|
||||
["11.12.25", "13.12.25", "Manor, Zürich", "CH", "75.00", "", "**** **** **** 1234"],
|
||||
["12.12.25", "14.12.25", "Zalando, zalando.ch", "CH", "90.00", "", "**** **** **** 1234"],
|
||||
["13.12.25", "15.12.25", "SBB CFF FFS, Bern", "CH", "60.00", "", "**** **** **** 1234"],
|
||||
["14.12.25", "16.12.25", "Apple Store, Zürich", "CH", "999.00", "", "**** **** **** 1234"],
|
||||
["15.12.25", "17.12.25", "Migros-Genossenschafts-Bund, Zürich", "CH", "150.00", "", "**** **** **** 1234"],
|
||||
["16.12.25", "18.12.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "250.00", "", "**** **** **** 1234"],
|
||||
["17.12.25", "19.12.25", "Shell Waldhof, Wald ZH", "CH", "60.00", "", "**** **** **** 1234"],
|
||||
["18.12.25", "20.12.25", "Zürich HB, Zürich", "CH", "30.00", "", "**** **** **** 1234"],
|
||||
["19.12.25", "21.12.25", "Amazon Marketplace, amazon.de", "DE", "80.00", "", "**** **** **** 1234"],
|
||||
["20.12.25", "22.12.25", "IKEA, Dietlikon", "CH", "400.00", "", "**** **** **** 1234"],
|
||||
["21.12.25", "23.12.25", "Manor, Zürich", "CH", "100.00", "", "**** **** **** 1234"],
|
||||
["22.12.25", "24.12.25", "Zalando, zalando.ch", "CH", "110.00", "", "**** **** **** 1234"],
|
||||
["23.12.25", "25.12.25", "SBB CFF FFS, Bern", "CH", "70.00", "", "**** **** **** 1234"],
|
||||
["24.12.25", "26.12.25", "Apple Store, Zürich", "CH", "1200.00", "", "**** **** **** 1234"],
|
||||
["25.12.25", "27.12.25", "Migros-Genossenschafts-Bund, Zürich", "CH", "180.00", "", "**** **** **** 1234"],
|
||||
["26.12.25", "28.12.25", "Coop-4253 Hinwil Wasseri, Hinwil", "CH", "300.00", "", "**** **** **** 1234"],
|
||||
["27.12.25", "29.12.25", "Shell Waldhof, Wald ZH", "CH", "70.00", "", "**** **** **** 1234"],
|
||||
["28.12.25", "30.12.25", "Zürich HB, Zürich", "CH", "40.00", "", "**** **** **** 1234"],
|
||||
["29.12.25", "31.12.25", "Amazon Marketplace, amazon.de", "DE", "100.00", "", "**** **** **** 1234"],
|
||||
["30.12.25", "01.01.26", "IKEA, Dietlikon", "CH", "450.00", "", "**** **** **** 1234"],
|
||||
["31.12.25", "02.01.26", "Manor, Zürich", "CH", "125.00", "", "**** **** **** 1234"]
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
================================================================================
|
||||
|
|
@ -1,239 +0,0 @@
|
|||
# AI Call Iteration Flow - JSON Merging System
|
||||
|
||||
This document describes the iteration flow for handling large JSON responses from AI that may be truncated and need to be merged across multiple iterations.
|
||||
|
||||
## Overview
|
||||
|
||||
When an AI response is too large, it may be truncated (cut) at an arbitrary point. The iteration system:
|
||||
1. Detects incomplete JSON
|
||||
2. Requests continuation from the AI
|
||||
3. Merges the continuation with the existing JSON
|
||||
4. Repeats until complete or max failures reached
|
||||
|
||||
---
|
||||
|
||||
## Key Variables
|
||||
|
||||
| Variable | Type | Purpose |
|
||||
|----------|------|---------|
|
||||
| `jsonBase` | `str \| None` | The merged JSON string (CUT version for overlap matching) |
|
||||
| `candidateJson` | `str` | Temporary holder for merged result until validated |
|
||||
| `lastValidCompletePart` | `str \| None` | Fallback - last successfully parsed CLOSED JSON |
|
||||
| `lastOverlapContext` | `str` | Context for retry/continuation prompts |
|
||||
| `lastHierarchyContextForPrompt` | `str` | Context for retry/continuation prompts |
|
||||
| `mergeFailCount` | `int` | Global counter (max 3 failures) |
|
||||
|
||||
---
|
||||
|
||||
## Key Distinction: hierarchyContext vs completePart
|
||||
|
||||
| Field | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `hierarchyContext` | **CUT JSON** - truncated at cut point | Used as `jsonBase` for merging with next AI fragment |
|
||||
| `completePart` | **CLOSED JSON** - all structures properly closed | Used for validation, parsing, and fallback |
|
||||
|
||||
**Why this matters:**
|
||||
- The next AI fragment starts with an **overlap** that matches the CUT point
|
||||
- If we used `completePart` (closed), the overlap detection would FAIL
|
||||
- We must use `hierarchyContext` (cut) so overlap matching works correctly
|
||||
|
||||
---
|
||||
|
||||
## Flow Steps
|
||||
|
||||
### Step 1: BUILD PROMPT
|
||||
|
||||
**Location:** `subAiCallLooping.py` lines 163-212
|
||||
**Function:** `buildContinuationContext()` from `modules/shared/jsonUtils.py`
|
||||
|
||||
- **First iteration:** Use original prompt
|
||||
- **Continuation:** `buildContinuationContext(allSections, lastRawResponse, ...)`
|
||||
- Internally calls `getContexts(lastRawResponse)` to get overlap/hierarchy
|
||||
- Builds continuation prompt with `overlapContext` + `hierarchyContextForPrompt`
|
||||
|
||||
### Step 2: CALL AI
|
||||
|
||||
**Location:** `subAiCallLooping.py` lines 214-299
|
||||
**Function:** `self.aiService.callAi(request)`
|
||||
|
||||
- Returns `response.content` as `result`
|
||||
- NOTE: Do NOT update `lastRawResponse` yet! (only after successful merge)
|
||||
|
||||
### Step 4: MERGE
|
||||
|
||||
**Location:** `subAiCallLooping.py` lines 338-396
|
||||
**Function:** `JsonResponseHandler.mergeJsonStringsWithOverlap()` from `modules/services/serviceAi/subJsonResponseHandling.py`
|
||||
|
||||
```
|
||||
IF first iteration (jsonBase is None):
|
||||
→ candidateJson = result
|
||||
ELSE:
|
||||
→ mergedJsonString, hasOverlap = mergeJsonStringsWithOverlap(jsonBase, result)
|
||||
|
||||
IF hasOverlap = False (MERGE FAILED):
|
||||
→ mergeFailCount++
|
||||
→ If mergeFailCount >= 3: return lastValidCompletePart (fallback)
|
||||
→ Else: continue (retry with unchanged jsonBase AND lastRawResponse!)
|
||||
ELSE:
|
||||
→ candidateJson = mergedJsonString (don't update jsonBase yet!)
|
||||
|
||||
→ lastRawResponse = candidateJson (ONLY after first iteration or successful merge!)
|
||||
|
||||
TRY DIRECT PARSE of candidateJson:
|
||||
IF parse succeeds:
|
||||
→ jsonBase = candidateJson (commit)
|
||||
→ FINISHED! Return normalized result
|
||||
ELSE:
|
||||
→ Proceed to Step 5
|
||||
```
|
||||
|
||||
### Step 5: GET CONTEXTS
|
||||
|
||||
**Location:** `subAiCallLooping.py` lines 420-427
|
||||
**Function:** `getContexts()` from `modules/shared/jsonContinuation.py`
|
||||
|
||||
```python
|
||||
contexts = getContexts(candidateJson)
|
||||
```
|
||||
|
||||
Returns `JsonContinuationContexts`:
|
||||
- `overlapContext`: `""` if JSON is complete (no cut point)
|
||||
- `hierarchyContext`: CUT JSON (for merging with next fragment)
|
||||
- `hierarchyContextForPrompt`: CUT JSON with budget limits (for prompts)
|
||||
- `completePart`: CLOSED JSON (repaired if needed)
|
||||
- `jsonParsingSuccess`: `True` if completePart is valid JSON
|
||||
|
||||
**Enhancement:** If original JSON is already complete → `overlapContext = ""`
|
||||
This signals "JSON is complete, no more continuation needed"
|
||||
|
||||
### Step 6: DECIDE
|
||||
|
||||
**Location:** `subAiCallLooping.py` lines 429-528
|
||||
|
||||
#### Case A: `jsonParsingSuccess=true` AND `overlapContext=""`
|
||||
**→ FINISHED**
|
||||
- JSON is complete (no cut point)
|
||||
- `jsonBase = contexts.completePart` (use CLOSED version for final result)
|
||||
- Return `completePart` as result
|
||||
|
||||
#### Case B: `jsonParsingSuccess=true` AND `overlapContext!=""`
|
||||
**→ CONTINUE to next iteration**
|
||||
- JSON parseable but has cut point
|
||||
- `jsonBase = contexts.hierarchyContext` ← **CUT version for next merge!**
|
||||
- `lastValidCompletePart = contexts.completePart` ← **CLOSED version for fallback**
|
||||
- Store contexts for next prompt
|
||||
- `mergeFailCount = 0` (reset on success)
|
||||
- `lastRawResponse = jsonBase`
|
||||
- Continue to next iteration
|
||||
|
||||
#### Case C: `jsonParsingSuccess=false`
|
||||
**→ RETRY with same prompt**
|
||||
- Do NOT update `jsonBase` (keep previous valid state)
|
||||
- `mergeFailCount++`
|
||||
- If `mergeFailCount >= 3`: return `lastValidCompletePart` (fallback)
|
||||
- Else: continue (retry with unchanged jsonBase/lastRawResponse)
|
||||
|
||||
---
|
||||
|
||||
## Flow Diagram
|
||||
|
||||
```
|
||||
┌───────────────────────────────────────────────────────────────┐
|
||||
│ ITERATION START │
|
||||
└───────────────────────────┬───────────────────────────────────┘
|
||||
│
|
||||
┌───────────────────────────▼───────────────────────────────────┐
|
||||
│ STEP 1: BUILD PROMPT │
|
||||
│ - First: original prompt │
|
||||
│ - Next: buildContinuationContext(lastRawResponse) │
|
||||
└───────────────────────────┬───────────────────────────────────┘
|
||||
│
|
||||
┌───────────────────────────▼───────────────────────────────────┐
|
||||
│ STEP 2: CALL AI → result │
|
||||
└───────────────────────────┬───────────────────────────────────┘
|
||||
│
|
||||
┌───────────────────────────▼───────────────────────────────────┐
|
||||
│ STEP 4: MERGE jsonBase + result → candidateJson │
|
||||
└───────────────────────────┬───────────────────────────────────┘
|
||||
│
|
||||
┌────────────▼────────────┐
|
||||
│ Merge OK? │
|
||||
└────────────┬────────────┘
|
||||
│
|
||||
┌─────────────────────┼─────────────────────┐
|
||||
│ NO │ YES │
|
||||
▼ ▼ │
|
||||
┌──────────────┐ ┌──────────────────┐ │
|
||||
│ fails++ │ │ TRY DIRECT PARSE │ │
|
||||
│ if >=3: │ │ of candidateJson │ │
|
||||
│ RETURN │ └────────┬─────────┘ │
|
||||
│ fallback │ │ │
|
||||
│ else: RETRY │ ┌────────▼─────────┐ │
|
||||
│ (continue) │ │ Parse OK? │ │
|
||||
└──────────────┘ └────────┬─────────┘ │
|
||||
│ │
|
||||
┌─────────────────────┼─────────────────────┐
|
||||
│ YES │ NO │
|
||||
▼ ▼ │
|
||||
┌──────────────┐ ┌──────────────────────────────┐
|
||||
│ FINISHED ✓ │ │ STEP 5: getContexts() │
|
||||
│ Return │ │ → jsonParsingSuccess │
|
||||
│ normalized │ │ → overlapContext │
|
||||
│ result │ └────────────┬─────────────────┘
|
||||
└──────────────┘ │
|
||||
┌────────────▼────────────────────┐
|
||||
│ STEP 6: DECIDE │
|
||||
└────────────┬────────────────────┘
|
||||
│
|
||||
┌────────────────────────────┼────────────────────────────┐
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌───────────────────┐ ┌───────────────────────┐ ┌───────────────────┐
|
||||
│ success=true │ │ success=true │ │ success=false │
|
||||
│ overlap="" │ │ overlap!="" │ │ │
|
||||
│ ───────────── │ │ ───────────────── │ │ ───────────── │
|
||||
│ FINISHED ✓ │ │ CONTINUE │ │ RETRY │
|
||||
│ │ │ │ │ │
|
||||
│ jsonBase = │ │ jsonBase = │ │ jsonBase unchanged│
|
||||
│ completePart │ │ hierarchyContext │ │ fails++ │
|
||||
│ (CLOSED) │ │ (CUT for merge!) │ │ │
|
||||
│ │ │ │ │ if >=3: fallback │
|
||||
│ Return result │ │ fallback = │ │ else: retry │
|
||||
│ │ │ completePart │ │ │
|
||||
│ │ │ (CLOSED) │ │ │
|
||||
│ │ │ │ │ │
|
||||
│ │ │ Next iteration → │ │ │
|
||||
└───────────────────┘ └───────────────────────┘ └───────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Files Involved
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `modules/services/serviceAi/subAiCallLooping.py` | Main iteration loop |
|
||||
| `modules/shared/jsonContinuation.py` | `getContexts()` - context extraction & repair |
|
||||
| `modules/shared/jsonUtils.py` | `buildContinuationContext()` - prompt building |
|
||||
| `modules/services/serviceAi/subJsonResponseHandling.py` | `mergeJsonStringsWithOverlap()` |
|
||||
| `modules/services/serviceAi/subJsonMerger.py` | `ModularJsonMerger` - actual merge logic |
|
||||
| `modules/datamodels/datamodelAi.py` | `JsonContinuationContexts` model |
|
||||
|
||||
---
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Merge Failures
|
||||
- Max 3 consecutive failures allowed
|
||||
- On failure: retry with unchanged `jsonBase` (previous valid state)
|
||||
- After 3 failures: return `lastValidCompletePart` as fallback
|
||||
|
||||
### Parse Failures
|
||||
- If `getContexts()` cannot produce valid JSON: increment fail counter
|
||||
- Retry with same prompt (don't update jsonBase)
|
||||
- After 3 failures: return `lastValidCompletePart` as fallback
|
||||
|
||||
### Fallback Strategy
|
||||
- `lastValidCompletePart` stores the last successfully parsed CLOSED JSON
|
||||
- Always available as fallback when things go wrong
|
||||
- Ensures we return valid JSON even after multiple failures
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue