Compare commits
23 commits
052647a52b
...
0659d0d21a
| Author | SHA1 | Date | |
|---|---|---|---|
| 0659d0d21a | |||
| 9115d9eec8 | |||
| da974190ea | |||
|
|
7942766931 | ||
|
|
c140bd14d4 | ||
| 06d9910ecd | |||
|
|
b500bfa6c1 | ||
|
|
afd7e9d941 | ||
|
|
b12671bbb5 | ||
|
|
880fa4d787 | ||
| 72d3175f49 | |||
| ce671f61b6 | |||
| 4a840e9e6e | |||
| 93cb6939dc | |||
| 3add5c9a80 | |||
| 6a5ff1ff7c | |||
| dff3d41845 | |||
| a7f4055130 | |||
| 078b4eaaaf | |||
| 9d82d3d353 | |||
|
|
ba21005401 | ||
|
|
4d7ccb0418 | ||
|
|
e8abd553d0 |
133 changed files with 10662 additions and 3003 deletions
13
app.py
13
app.py
|
|
@ -405,6 +405,16 @@ async def lifespan(app: FastAPI):
|
|||
except Exception as e:
|
||||
logger.warning(f"BackgroundJob recovery failed (non-critical): {e}")
|
||||
|
||||
# Subscribe knowledge ingestion to connection lifecycle events so OAuth
|
||||
# connect/disconnect reliably trigger bootstrap/purge.
|
||||
try:
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorIngestConsumer import (
|
||||
registerKnowledgeIngestionConsumer,
|
||||
)
|
||||
registerKnowledgeIngestionConsumer()
|
||||
except Exception as e:
|
||||
logger.warning(f"KnowledgeIngestionConsumer registration failed (non-critical): {e}")
|
||||
|
||||
yield
|
||||
|
||||
# --- Stop Managers ---
|
||||
|
|
@ -672,6 +682,9 @@ app.include_router(navigationRouter)
|
|||
from modules.routes.routeWorkflowDashboard import router as workflowDashboardRouter
|
||||
app.include_router(workflowDashboardRouter)
|
||||
|
||||
from modules.routes.routeAutomationWorkspace import router as automationWorkspaceRouter
|
||||
app.include_router(automationWorkspaceRouter)
|
||||
|
||||
# ============================================================================
|
||||
# PLUG&PLAY FEATURE ROUTERS
|
||||
# Dynamically load routers from feature containers in modules/features/
|
||||
|
|
|
|||
|
|
@ -351,6 +351,7 @@ class AiAnthropic(BaseConnectorAi):
|
|||
|
||||
# Parse response
|
||||
anthropicResponse = response.json()
|
||||
stop_reason = anthropicResponse.get("stop_reason")
|
||||
|
||||
# Extract content and tool_use blocks from response
|
||||
content = ""
|
||||
|
|
@ -374,9 +375,25 @@ class AiAnthropic(BaseConnectorAi):
|
|||
|
||||
if not content and not toolCalls:
|
||||
logger.warning(f"Anthropic API returned empty content. Full response: {anthropicResponse}")
|
||||
content = "[Anthropic API returned empty response]"
|
||||
err = (
|
||||
"Anthropic refused the request (content policy) — try another model or adjust the prompt."
|
||||
if stop_reason == "refusal"
|
||||
else f"Anthropic returned no assistant text (stop_reason={stop_reason or 'unknown'})."
|
||||
)
|
||||
return AiModelResponse(
|
||||
content="",
|
||||
success=False,
|
||||
error=err,
|
||||
modelId=model.name,
|
||||
metadata={
|
||||
"response_id": anthropicResponse.get("id", ""),
|
||||
"stop_reason": stop_reason,
|
||||
},
|
||||
)
|
||||
|
||||
metadata = {"response_id": anthropicResponse.get("id", "")}
|
||||
if stop_reason:
|
||||
metadata["stop_reason"] = stop_reason
|
||||
if toolCalls:
|
||||
metadata["toolCalls"] = toolCalls
|
||||
|
||||
|
|
@ -492,6 +509,19 @@ class AiAnthropic(BaseConnectorAi):
|
|||
f"Anthropic stream returned empty response: model={model.name}, "
|
||||
f"stopReason={stopReason}"
|
||||
)
|
||||
err = (
|
||||
"Anthropic refused the request (content policy) — try another model or adjust the prompt."
|
||||
if stopReason == "refusal"
|
||||
else f"Anthropic returned no assistant text (stop_reason={stopReason or 'unknown'})."
|
||||
)
|
||||
yield AiModelResponse(
|
||||
content="",
|
||||
success=False,
|
||||
error=err,
|
||||
modelId=model.name,
|
||||
metadata={"stopReason": stopReason} if stopReason else {},
|
||||
)
|
||||
return
|
||||
|
||||
metadata: Dict[str, Any] = {}
|
||||
if stopReason:
|
||||
|
|
|
|||
|
|
@ -834,7 +834,10 @@ class DatabaseConnector:
|
|||
createdTs = record.get("sysCreatedAt")
|
||||
if createdTs is None or createdTs == 0 or createdTs == 0.0:
|
||||
record["sysCreatedAt"] = currentTime
|
||||
if effective_user_id:
|
||||
# Do not wipe caller-provided sysCreatedBy (e.g. FileItem from createFile with
|
||||
# real user). ContextVar can be "system" for the DB pool while the business
|
||||
# user is set on the record from model_dump().
|
||||
if effective_user_id and not record.get("sysCreatedBy"):
|
||||
record["sysCreatedBy"] = effective_user_id
|
||||
elif not record.get("sysCreatedBy"):
|
||||
if effective_user_id:
|
||||
|
|
@ -1531,7 +1534,7 @@ class DatabaseConnector:
|
|||
createdTs = rec.get("sysCreatedAt")
|
||||
if createdTs is None or createdTs == 0 or createdTs == 0.0:
|
||||
rec["sysCreatedAt"] = currentTime
|
||||
if effectiveUserId:
|
||||
if effectiveUserId and not rec.get("sysCreatedBy"):
|
||||
rec["sysCreatedBy"] = effectiveUserId
|
||||
elif not rec.get("sysCreatedBy") and effectiveUserId:
|
||||
rec["sysCreatedBy"] = effectiveUserId
|
||||
|
|
|
|||
|
|
@ -126,6 +126,11 @@ def _stripGraphBase(url: str) -> str:
|
|||
|
||||
def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry:
|
||||
isFolder = "folder" in item
|
||||
# Graph exposes the driveItem content hash as ``eTag`` (quoted) or
|
||||
# ``cTag``; we normalise to a "revision" string so callers can use it as a
|
||||
# stable ``contentVersion`` for idempotent ingestion without re-downloading
|
||||
# file bytes.
|
||||
revision = item.get("eTag") or item.get("cTag")
|
||||
return ExternalEntry(
|
||||
name=item.get("name", ""),
|
||||
path=f"{basePath}/{item.get('name', '')}" if basePath else item.get("name", ""),
|
||||
|
|
@ -137,6 +142,9 @@ def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> Exter
|
|||
"id": item.get("id"),
|
||||
"webUrl": item.get("webUrl"),
|
||||
"childCount": item.get("folder", {}).get("childCount") if isFolder else None,
|
||||
"revision": revision,
|
||||
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
||||
"parentReference": item.get("parentReference", {}),
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -167,21 +175,36 @@ class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
|
|||
return await self._discoverSites()
|
||||
|
||||
if not folderPath or folderPath == "/":
|
||||
endpoint = f"sites/{siteId}/drive/root/children"
|
||||
endpoint: Optional[str] = f"sites/{siteId}/drive/root/children?$top=200"
|
||||
else:
|
||||
cleanPath = folderPath.lstrip("/")
|
||||
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children"
|
||||
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children?$top=200"
|
||||
|
||||
result = await self._graphGet(endpoint)
|
||||
if "error" in result:
|
||||
logger.warning(f"SharePoint browse failed: {result['error']}")
|
||||
return []
|
||||
# Follow @odata.nextLink until a hard cap is reached so large libraries
|
||||
# are fully enumerated (required for bootstrap). Per-page size uses
|
||||
# Graph's max supported value to minimise round-trips.
|
||||
effectiveLimit = int(limit) if limit is not None else None
|
||||
items: List[Dict[str, Any]] = []
|
||||
hardCap = 5000
|
||||
while endpoint and len(items) < hardCap:
|
||||
result = await self._graphGet(endpoint)
|
||||
if "error" in result:
|
||||
logger.warning(f"SharePoint browse failed: {result['error']}")
|
||||
break
|
||||
for raw in result.get("value", []) or []:
|
||||
items.append(raw)
|
||||
if effectiveLimit is not None and len(items) >= effectiveLimit:
|
||||
break
|
||||
if effectiveLimit is not None and len(items) >= effectiveLimit:
|
||||
break
|
||||
nextLink = result.get("@odata.nextLink")
|
||||
endpoint = _stripGraphBase(nextLink) if nextLink else None
|
||||
|
||||
entries = [_graphItemToExternalEntry(item, path) for item in result.get("value", [])]
|
||||
entries = [_graphItemToExternalEntry(item, path) for item in items]
|
||||
if filter:
|
||||
entries = [e for e in entries if _matchFilter(e, filter)]
|
||||
if limit is not None:
|
||||
entries = entries[: max(1, int(limit))]
|
||||
if effectiveLimit is not None:
|
||||
entries = entries[: max(1, effectiveLimit)]
|
||||
return entries
|
||||
|
||||
async def _discoverSites(self) -> List[ExternalEntry]:
|
||||
|
|
|
|||
|
|
@ -162,6 +162,7 @@ class AiCallOptions(BaseModel):
|
|||
|
||||
# Provider filtering (from UI multiselect or automation config)
|
||||
allowedProviders: Optional[List[str]] = Field(default=None, description="List of allowed AI providers to use (empty = all RBAC-permitted)")
|
||||
allowedModels: Optional[List[str]] = Field(default=None, description="Whitelist of allowed model names (AND-filter with allowedProviders). None/empty = all allowed.")
|
||||
|
||||
|
||||
class AiCallRequest(BaseModel):
|
||||
|
|
|
|||
|
|
@ -110,11 +110,13 @@ class DocumentReferenceList(BaseModel):
|
|||
# docItem:documentId
|
||||
references.append(DocumentItemReference(documentId=parts[0]))
|
||||
|
||||
# Unknown format - skip or log warning
|
||||
else:
|
||||
# Try to parse as simple string (backward compatibility)
|
||||
# Assume it's a label if it doesn't match known patterns
|
||||
if refStr:
|
||||
if not refStr:
|
||||
continue
|
||||
import re
|
||||
if re.match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', refStr, re.I):
|
||||
references.append(DocumentItemReference(documentId=refStr))
|
||||
else:
|
||||
references.append(DocumentListReference(label=refStr))
|
||||
|
||||
return cls(references=references)
|
||||
|
|
|
|||
|
|
@ -95,7 +95,14 @@ class ExtractionOptions(BaseModel):
|
|||
imageQuality: int = Field(default=85, ge=1, le=100, description="Image quality (1-100)")
|
||||
|
||||
# Merging strategy
|
||||
mergeStrategy: MergeStrategy = Field(default_factory=MergeStrategy, description="Strategy for merging extraction results")
|
||||
mergeStrategy: Optional[MergeStrategy] = Field(
|
||||
default_factory=MergeStrategy,
|
||||
description=(
|
||||
"Strategy for merging extraction results. Pass None to skip merging entirely "
|
||||
"(required for per-chunk ingestion pipelines like RAG, where per-page/per-section "
|
||||
"granularity must be preserved for embedding)."
|
||||
),
|
||||
)
|
||||
|
||||
# Optional chunking parameters (for backward compatibility)
|
||||
chunkAllowed: Optional[bool] = Field(default=None, description="Whether chunking is allowed")
|
||||
|
|
|
|||
|
|
@ -1,82 +0,0 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""FileFolder: hierarchical folder structure for file organization."""
|
||||
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.datamodels.datamodelBase import PowerOnModel
|
||||
from modules.shared.i18nRegistry import i18nModel
|
||||
import uuid
|
||||
|
||||
|
||||
@i18nModel("Dateiordner")
|
||||
class FileFolder(PowerOnModel):
|
||||
"""Hierarchischer Ordner fuer die Dateiverwaltung."""
|
||||
id: str = Field(
|
||||
default_factory=lambda: str(uuid.uuid4()),
|
||||
description="Primary key",
|
||||
json_schema_extra={"label": "ID", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False},
|
||||
)
|
||||
name: str = Field(
|
||||
description="Folder name",
|
||||
json_schema_extra={"label": "Name", "frontend_type": "text", "frontend_readonly": False, "frontend_required": True},
|
||||
)
|
||||
parentId: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Parent folder ID (null = root)",
|
||||
json_schema_extra={
|
||||
"label": "Uebergeordneter Ordner",
|
||||
"frontend_type": "text",
|
||||
"frontend_readonly": False,
|
||||
"frontend_required": False,
|
||||
"fk_target": {"db": "poweron_management", "table": "FileFolder", "labelField": "name"},
|
||||
},
|
||||
)
|
||||
mandateId: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Mandate context",
|
||||
json_schema_extra={
|
||||
"label": "Mandanten-ID",
|
||||
"frontend_type": "text",
|
||||
"frontend_readonly": True,
|
||||
"frontend_required": False,
|
||||
"fk_target": {"db": "poweron_app", "table": "Mandate", "labelField": "label"},
|
||||
},
|
||||
)
|
||||
featureInstanceId: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Feature instance context",
|
||||
json_schema_extra={
|
||||
"label": "Feature-Instanz-ID",
|
||||
"frontend_type": "text",
|
||||
"frontend_readonly": True,
|
||||
"frontend_required": False,
|
||||
"fk_target": {"db": "poweron_app", "table": "FeatureInstance", "labelField": "label"},
|
||||
},
|
||||
)
|
||||
scope: str = Field(
|
||||
default="personal",
|
||||
description="Data visibility scope: personal, featureInstance, mandate, global. Inherited by files in this folder.",
|
||||
json_schema_extra={
|
||||
"label": "Sichtbarkeit",
|
||||
"frontend_type": "select",
|
||||
"frontend_readonly": False,
|
||||
"frontend_required": False,
|
||||
"frontend_options": [
|
||||
{"value": "personal", "label": "Persönlich"},
|
||||
{"value": "featureInstance", "label": "Feature-Instanz"},
|
||||
{"value": "mandate", "label": "Mandant"},
|
||||
{"value": "global", "label": "Global"},
|
||||
],
|
||||
},
|
||||
)
|
||||
neutralize: bool = Field(
|
||||
default=False,
|
||||
description="Whether files in this folder should be neutralized before AI processing. Inherited by new/moved files.",
|
||||
json_schema_extra={
|
||||
"label": "Neutralisieren",
|
||||
"frontend_type": "checkbox",
|
||||
"frontend_readonly": False,
|
||||
"frontend_required": False,
|
||||
},
|
||||
)
|
||||
|
|
@ -68,17 +68,6 @@ class FileItem(PowerOnModel):
|
|||
description="Tags for categorization and search",
|
||||
json_schema_extra={"label": "Tags", "frontend_type": "tags", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
folderId: Optional[str] = Field(
|
||||
default=None,
|
||||
description="ID of the parent folder",
|
||||
json_schema_extra={
|
||||
"label": "Ordner-ID",
|
||||
"frontend_type": "text",
|
||||
"frontend_readonly": False,
|
||||
"frontend_required": False,
|
||||
"fk_target": {"db": "poweron_management", "table": "FileFolder", "labelField": "name"},
|
||||
},
|
||||
)
|
||||
description: Optional[str] = Field(
|
||||
default=None,
|
||||
description="User-provided description of the file",
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ Unified JSON document schema and helpers used by both generation prompts and ren
|
|||
This defines a single canonical template and the supported section types.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from typing import List, Literal, TypedDict
|
||||
|
||||
# Canonical list of supported section types across the system
|
||||
supportedSectionTypes: List[str] = [
|
||||
|
|
@ -18,6 +18,21 @@ supportedSectionTypes: List[str] = [
|
|||
"image",
|
||||
]
|
||||
|
||||
class InlineRun(TypedDict, total=False):
|
||||
"""Single inline content run. Every paragraph/cell/list-item is a List[InlineRun]."""
|
||||
type: Literal["text", "image", "link", "bold", "italic", "code"]
|
||||
value: str # text content (for text/bold/italic/code/link-label)
|
||||
fileId: str # for type=image: reference to FileItem
|
||||
base64Data: str # for type=image: resolved base64 (post-processing)
|
||||
mimeType: str # for type=image: e.g. "image/png"
|
||||
widthPt: int # for type=image: optional render width
|
||||
href: str # for type=link: URL target
|
||||
|
||||
supportedInlineRunTypes: List[str] = [
|
||||
"text", "image", "link", "bold", "italic", "code",
|
||||
]
|
||||
|
||||
|
||||
# Canonical JSON template used for AI generation (documents array + sections)
|
||||
# This template is used for STRUCTURE generation - sections have empty elements arrays.
|
||||
# For content generation, elements arrays will be populated later.
|
||||
|
|
|
|||
|
|
@ -90,6 +90,16 @@ class FileContentIndex(PowerOnModel):
|
|||
description="Data visibility scope: personal, featureInstance, mandate, global",
|
||||
json_schema_extra={"label": "Sichtbarkeit"},
|
||||
)
|
||||
sourceKind: str = Field(
|
||||
default="file",
|
||||
description="Origin of the indexed content: file, sharepoint_item, outlook_message, outlook_attachment, ...",
|
||||
json_schema_extra={"label": "Quellenart"},
|
||||
)
|
||||
connectionId: Optional[str] = Field(
|
||||
default=None,
|
||||
description="UserConnection ID if this index entry originates from an external connector",
|
||||
json_schema_extra={"label": "Connection-ID"},
|
||||
)
|
||||
neutralizationStatus: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Neutralization status: completed, failed, skipped, None = not required",
|
||||
|
|
|
|||
|
|
@ -13,6 +13,42 @@ import math
|
|||
T = TypeVar('T')
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Table Grouping models
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TableGroupNode(BaseModel):
|
||||
"""
|
||||
A single node in a user-defined group tree for a FormGeneratorTable.
|
||||
|
||||
Items belong to exactly one group (no multi-membership).
|
||||
Groups can be nested to arbitrary depth via subGroups.
|
||||
"""
|
||||
id: str
|
||||
name: str
|
||||
itemIds: List[str] = Field(default_factory=list)
|
||||
subGroups: List['TableGroupNode'] = Field(default_factory=list)
|
||||
order: int = 0
|
||||
isExpanded: bool = True
|
||||
|
||||
TableGroupNode.model_rebuild()
|
||||
|
||||
|
||||
class TableGrouping(BaseModel):
|
||||
"""
|
||||
Persisted grouping configuration for one (user, contextKey) pair.
|
||||
Stored in table_groupings in poweron_app (auto-created).
|
||||
|
||||
contextKey convention: API path without /api/ prefix and without trailing slash.
|
||||
Examples: "connections", "prompts", "admin/users", "trustee/{instanceId}/documents"
|
||||
"""
|
||||
id: str
|
||||
userId: str
|
||||
contextKey: str
|
||||
rootGroups: List[TableGroupNode] = Field(default_factory=list)
|
||||
updatedAt: Optional[float] = None
|
||||
|
||||
|
||||
class SortField(BaseModel):
|
||||
"""
|
||||
Single sort field configuration.
|
||||
|
|
@ -24,12 +60,23 @@ class SortField(BaseModel):
|
|||
class PaginationParams(BaseModel):
|
||||
"""
|
||||
Complete pagination state including page, sorting, and filters.
|
||||
|
||||
Grouping extensions (both optional — omit when not using grouping):
|
||||
groupId — Scope the request to items belonging to this group.
|
||||
The backend resolves it to an itemIds IN-filter before
|
||||
applying normal pagination/search/filter logic.
|
||||
Also applied for mode=ids and mode=filterValues so that
|
||||
bulk-select and filter-dropdowns respect the group scope.
|
||||
saveGroupTree — If present the backend persists this tree for the current
|
||||
(user, contextKey) pair *before* fetching, then returns
|
||||
the confirmed tree in the response groupTree field.
|
||||
Omit on every request that does not change the group tree.
|
||||
"""
|
||||
page: int = Field(ge=1, description="Current page number (1-based)")
|
||||
pageSize: int = Field(ge=1, le=1000, description="Number of items per page")
|
||||
sort: List[SortField] = Field(default_factory=list, description="List of sort fields in priority order")
|
||||
filters: Optional[Dict[str, Any]] = Field(
|
||||
default=None,
|
||||
default=None,
|
||||
description="""Filter criteria dictionary. Supports:
|
||||
- General search: {"search": "text"} - searches across all text fields (case-insensitive)
|
||||
- Field-specific filters:
|
||||
|
|
@ -38,6 +85,14 @@ class PaginationParams(BaseModel):
|
|||
- Supported operators: equals/eq, contains, startsWith, endsWith, gt, gte, lt, lte, in, notIn
|
||||
- Multiple filters are combined with AND logic"""
|
||||
)
|
||||
groupId: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Scope request to items of this group (resolved server-side to itemIds IN-filter)",
|
||||
)
|
||||
saveGroupTree: Optional[List[Dict[str, Any]]] = Field(
|
||||
default=None,
|
||||
description="If set, persist this group tree before fetching (optimistic save)",
|
||||
)
|
||||
|
||||
|
||||
class PaginationRequest(BaseModel):
|
||||
|
|
@ -74,10 +129,19 @@ class PaginationMetadata(BaseModel):
|
|||
class PaginatedResponse(BaseModel, Generic[T]):
|
||||
"""
|
||||
Response containing paginated data and metadata.
|
||||
|
||||
groupTree is included when the endpoint supports table grouping and the
|
||||
current user has a saved group tree for the requested contextKey.
|
||||
It is None when grouping is not configured for the endpoint or the user
|
||||
has not created any groups yet. Frontend must treat None as an empty tree.
|
||||
"""
|
||||
items: List[T] = Field(..., description="Array of items for current page")
|
||||
pagination: Optional[PaginationMetadata] = Field(..., description="Pagination metadata (None if pagination not applied)")
|
||||
|
||||
groupTree: Optional[List[TableGroupNode]] = Field(
|
||||
default=None,
|
||||
description="Current group tree for this (user, contextKey) pair — None if no grouping configured",
|
||||
)
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
|
||||
|
|
@ -85,29 +149,33 @@ def normalize_pagination_dict(pagination_dict: Dict[str, Any]) -> Dict[str, Any]
|
|||
"""
|
||||
Normalize pagination dictionary to handle frontend variations.
|
||||
Moves top-level "search" field into filters if present.
|
||||
|
||||
Grouping fields (groupId, saveGroupTree) are passed through as-is.
|
||||
|
||||
Args:
|
||||
pagination_dict: Raw pagination dictionary from frontend
|
||||
|
||||
|
||||
Returns:
|
||||
Normalized pagination dictionary ready for PaginationParams parsing
|
||||
"""
|
||||
if not pagination_dict:
|
||||
return pagination_dict
|
||||
|
||||
|
||||
# Create a copy to avoid modifying the original
|
||||
normalized = dict(pagination_dict)
|
||||
|
||||
|
||||
# Ensure required fields have sensible defaults
|
||||
if "page" not in normalized:
|
||||
normalized["page"] = 1
|
||||
if "pageSize" not in normalized:
|
||||
normalized["pageSize"] = 25
|
||||
|
||||
|
||||
# Move top-level "search" into filters if present
|
||||
if "search" in normalized:
|
||||
if "filters" not in normalized or normalized["filters"] is None:
|
||||
normalized["filters"] = {}
|
||||
normalized["filters"]["search"] = normalized.pop("search")
|
||||
|
||||
|
||||
# groupId / saveGroupTree are valid PaginationParams fields — pass through unchanged.
|
||||
# No transformation needed; Pydantic will validate them.
|
||||
|
||||
return normalized
|
||||
|
|
|
|||
|
|
@ -475,7 +475,23 @@ class UserConnection(PowerOnModel):
|
|||
description="OAuth scopes granted for this connection",
|
||||
json_schema_extra={"frontend_type": "list", "frontend_readonly": True, "frontend_required": False, "label": "Gewährte Berechtigungen"},
|
||||
)
|
||||
|
||||
knowledgeIngestionEnabled: bool = Field(
|
||||
default=False,
|
||||
description="Whether the user has consented to knowledge ingestion for this connection",
|
||||
json_schema_extra={"frontend_type": "boolean", "frontend_readonly": False, "frontend_required": False, "label": "Wissensdatenbank aktiv"},
|
||||
)
|
||||
knowledgePreferences: Optional[Dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Per-connection knowledge ingestion preferences. schemaVersion=1 keys: "
|
||||
"neutralizeBeforeEmbed (bool), mailContentDepth (metadata|snippet|full), "
|
||||
"mailIndexAttachments (bool), filesIndexBinaries (bool), mimeAllowlist (list[str]), "
|
||||
"clickupScope (titles|title_description|with_comments), "
|
||||
"surfaceToggles (dict per authority), maxAgeDays (int)."
|
||||
),
|
||||
json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False, "label": "Wissenspräferenzen"},
|
||||
)
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def connectionReference(self) -> str:
|
||||
|
|
|
|||
|
|
@ -174,14 +174,26 @@ async def indexSessionData(
|
|||
for c in chunks
|
||||
]
|
||||
|
||||
await knowledgeService.indexFile(
|
||||
fileId=syntheticFileId,
|
||||
fileName=f"coaching-session-{sessionId[:8]}",
|
||||
mimeType="application/x-coaching-session",
|
||||
userId=userId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="coaching_session",
|
||||
sourceId=syntheticFileId,
|
||||
fileName=f"coaching-session-{sessionId[:8]}",
|
||||
mimeType="application/x-coaching-session",
|
||||
userId=userId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
provenance={
|
||||
"lane": "feature",
|
||||
"feature": "commcoach",
|
||||
"sessionId": sessionId,
|
||||
"contextId": contextId,
|
||||
"messageCount": len(messages or []),
|
||||
},
|
||||
)
|
||||
)
|
||||
logger.info(f"Successfully indexed coaching session {sessionId} ({len(chunks)} chunks)")
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ class AutoWorkflow(PowerOnModel):
|
|||
},
|
||||
)
|
||||
featureInstanceId: str = Field(
|
||||
description="Feature instance ID",
|
||||
description="Feature instance ID (GE owner instance / RBAC scope)",
|
||||
json_schema_extra={
|
||||
"frontend_type": "text",
|
||||
"frontend_readonly": True,
|
||||
|
|
@ -81,6 +81,17 @@ class AutoWorkflow(PowerOnModel):
|
|||
"fk_target": {"db": "poweron_app", "table": "FeatureInstance", "labelField": "label"},
|
||||
},
|
||||
)
|
||||
targetFeatureInstanceId: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Target feature instance for execution data scope. NULL for templates, mandatory for non-templates.",
|
||||
json_schema_extra={
|
||||
"frontend_type": "select",
|
||||
"frontend_readonly": False,
|
||||
"frontend_required": False,
|
||||
"label": "Ziel-Instanz",
|
||||
"fk_target": {"db": "poweron_app", "table": "FeatureInstance", "labelField": "label"},
|
||||
},
|
||||
)
|
||||
label: str = Field(
|
||||
description="User-friendly workflow name",
|
||||
json_schema_extra={"frontend_type": "text", "frontend_required": True, "label": "Bezeichnung"},
|
||||
|
|
|
|||
|
|
@ -12,17 +12,30 @@ import uuid
|
|||
from typing import Dict, Any, List, Optional
|
||||
|
||||
|
||||
def _make_json_serializable(obj: Any) -> Any:
|
||||
_INTERNAL_SKIP_KEYS = frozenset({"_context", "_orderedNodes"})
|
||||
|
||||
|
||||
def _make_json_serializable(obj: Any, _depth: int = 0) -> Any:
|
||||
"""
|
||||
Recursively convert bytes to base64 strings so structures can be JSON-serialized
|
||||
for storage in JSONB columns.
|
||||
|
||||
Internal runtime keys (_context, _orderedNodes) are skipped — they hold live
|
||||
Python objects (including back-references to nodeOutputs) and must never be
|
||||
stored. A depth guard prevents runaway recursion on unexpected circular refs.
|
||||
"""
|
||||
if _depth > 50:
|
||||
return None
|
||||
if isinstance(obj, bytes):
|
||||
return base64.b64encode(obj).decode("ascii")
|
||||
if isinstance(obj, dict):
|
||||
return {k: _make_json_serializable(v) for k, v in obj.items()}
|
||||
return {
|
||||
k: _make_json_serializable(v, _depth + 1)
|
||||
for k, v in obj.items()
|
||||
if k not in _INTERNAL_SKIP_KEYS
|
||||
}
|
||||
if isinstance(obj, list):
|
||||
return [_make_json_serializable(v) for v in obj]
|
||||
return [_make_json_serializable(v, _depth + 1) for v in obj]
|
||||
return obj
|
||||
|
||||
from modules.datamodels.datamodelUam import User
|
||||
|
|
@ -217,6 +230,8 @@ class GraphicalEditorObjects:
|
|||
data["id"] = str(uuid.uuid4())
|
||||
data["mandateId"] = self.mandateId
|
||||
data["featureInstanceId"] = self.featureInstanceId
|
||||
if not data.get("targetFeatureInstanceId") and not data.get("isTemplate"):
|
||||
data["targetFeatureInstanceId"] = self.featureInstanceId
|
||||
if "active" not in data or data.get("active") is None:
|
||||
data["active"] = True
|
||||
data["invocations"] = normalize_invocations_list(data.get("invocations"))
|
||||
|
|
|
|||
|
|
@ -3,6 +3,15 @@
|
|||
|
||||
from modules.shared.i18nRegistry import t
|
||||
|
||||
_AI_COMMON_PARAMS = [
|
||||
{"name": "requireNeutralization", "type": "bool", "required": False,
|
||||
"frontendType": "checkbox", "default": False,
|
||||
"description": t("Eingaben fuer diesen Call neutralisieren")},
|
||||
{"name": "allowedModels", "type": "array", "required": False,
|
||||
"frontendType": "modelMultiSelect", "default": [],
|
||||
"description": t("Erlaubte LLM-Modelle (leer = alle erlaubten)")},
|
||||
]
|
||||
|
||||
AI_NODES = [
|
||||
{
|
||||
"id": "ai.prompt",
|
||||
|
|
@ -10,20 +19,25 @@ AI_NODES = [
|
|||
"label": t("Prompt"),
|
||||
"description": t("Prompt eingeben und KI führt aus"),
|
||||
"parameters": [
|
||||
{"name": "aiPrompt", "type": "string", "required": True, "frontendType": "textarea",
|
||||
{"name": "aiPrompt", "type": "str", "required": True, "frontendType": "templateTextarea",
|
||||
"description": t("KI-Prompt")},
|
||||
{"name": "resultType", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "resultType", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"]},
|
||||
"description": t("Ausgabeformat"), "default": "txt"},
|
||||
{"name": "documentList", "type": "string", "required": False, "frontendType": "hidden",
|
||||
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
|
||||
{"name": "simpleMode", "type": "boolean", "required": False, "frontendType": "checkbox",
|
||||
{"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden",
|
||||
"description": t("Dokumente aus vorherigen Schritten"), "default": ""},
|
||||
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
|
||||
"description": t("Daten aus vorherigen Schritten"), "default": ""},
|
||||
{"name": "documentTheme", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["general", "finance", "legal", "technical", "hr"]},
|
||||
"description": t("Dokument-Thema (Style-Hinweis fuer den Renderer)"), "default": "general"},
|
||||
{"name": "simpleMode", "type": "bool", "required": False, "frontendType": "checkbox",
|
||||
"description": t("Einfacher Modus"), "default": True},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": [
|
||||
"DocumentList", "AiResult", "TextResult", "Transit", "LoopItem", "ActionResult",
|
||||
"FormPayload", "DocumentList", "AiResult", "TextResult", "Transit", "LoopItem", "ActionResult",
|
||||
]}},
|
||||
"outputPorts": {0: {"schema": "AiResult"}},
|
||||
"meta": {"icon": "mdi-robot", "color": "#9C27B0", "usesAi": True},
|
||||
|
|
@ -36,12 +50,16 @@ AI_NODES = [
|
|||
"label": t("Web-Recherche"),
|
||||
"description": t("Recherche im Web"),
|
||||
"parameters": [
|
||||
{"name": "prompt", "type": "string", "required": True, "frontendType": "textarea",
|
||||
{"name": "prompt", "type": "str", "required": True, "frontendType": "textarea",
|
||||
"description": t("Recherche-Anfrage")},
|
||||
],
|
||||
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
|
||||
"description": t("Daten aus vorherigen Schritten"), "default": ""},
|
||||
{"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden",
|
||||
"description": t("Dokumente aus vorherigen Schritten"), "default": ""},
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["Transit"]}},
|
||||
"inputPorts": {0: {"accepts": ["FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult"]}},
|
||||
"outputPorts": {0: {"schema": "AiResult"}},
|
||||
"meta": {"icon": "mdi-magnify", "color": "#9C27B0", "usesAi": True},
|
||||
"_method": "ai",
|
||||
|
|
@ -53,12 +71,12 @@ AI_NODES = [
|
|||
"label": t("Dokument zusammenfassen"),
|
||||
"description": t("Dokumentinhalt zusammenfassen"),
|
||||
"parameters": [
|
||||
{"name": "documentList", "type": "string", "required": True, "frontendType": "hidden",
|
||||
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
|
||||
{"name": "summaryLength", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "documentList", "type": "DocumentList", "required": True, "frontendType": "dataRef",
|
||||
"description": t("Dokumente aus vorherigen Schritten")},
|
||||
{"name": "summaryLength", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["brief", "medium", "detailed"]},
|
||||
"description": t("Kurz, mittel oder ausführlich"), "default": "medium"},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
|
||||
|
|
@ -73,11 +91,11 @@ AI_NODES = [
|
|||
"label": t("Dokument übersetzen"),
|
||||
"description": t("Dokument in Zielsprache übersetzen"),
|
||||
"parameters": [
|
||||
{"name": "documentList", "type": "string", "required": True, "frontendType": "hidden",
|
||||
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
|
||||
{"name": "targetLanguage", "type": "string", "required": True, "frontendType": "text",
|
||||
{"name": "documentList", "type": "DocumentList", "required": True, "frontendType": "dataRef",
|
||||
"description": t("Dokumente aus vorherigen Schritten")},
|
||||
{"name": "targetLanguage", "type": "str", "required": True, "frontendType": "text",
|
||||
"description": t("Zielsprache (z.B. de, en, French)")},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
|
||||
|
|
@ -92,12 +110,12 @@ AI_NODES = [
|
|||
"label": t("Dokument konvertieren"),
|
||||
"description": t("Dokument in anderes Format konvertieren"),
|
||||
"parameters": [
|
||||
{"name": "documentList", "type": "string", "required": True, "frontendType": "hidden",
|
||||
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
|
||||
{"name": "targetFormat", "type": "string", "required": True, "frontendType": "select",
|
||||
{"name": "documentList", "type": "DocumentList", "required": True, "frontendType": "dataRef",
|
||||
"description": t("Dokumente aus vorherigen Schritten")},
|
||||
{"name": "targetFormat", "type": "str", "required": True, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["docx", "pdf", "xlsx", "csv", "txt", "html", "json", "md"]},
|
||||
"description": t("Zielformat")},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
|
||||
|
|
@ -112,12 +130,24 @@ AI_NODES = [
|
|||
"label": t("Dokument generieren"),
|
||||
"description": t("Dokument aus Prompt generieren"),
|
||||
"parameters": [
|
||||
{"name": "prompt", "type": "string", "required": True, "frontendType": "textarea",
|
||||
{"name": "prompt", "type": "str", "required": True, "frontendType": "textarea",
|
||||
"description": t("Generierungs-Prompt")},
|
||||
],
|
||||
{"name": "outputFormat", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["docx", "pdf", "txt", "html", "md"]},
|
||||
"description": t("Ausgabeformat"), "default": "docx"},
|
||||
{"name": "title", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Dokumenttitel (Metadaten / Dateiname)"), "default": ""},
|
||||
{"name": "documentType", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["letter", "memo", "proposal", "contract", "report", "email"]},
|
||||
"description": t("Dokumentart (Inhaltshinweis fuer die KI)"), "default": "proposal"},
|
||||
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
|
||||
"description": t("Daten aus vorherigen Schritten"), "default": ""},
|
||||
{"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden",
|
||||
"description": t("Dokumente aus vorherigen Schritten"), "default": ""},
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["Transit"]}},
|
||||
"inputPorts": {0: {"accepts": ["FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult"]}},
|
||||
"outputPorts": {0: {"schema": "DocumentList"}},
|
||||
"meta": {"icon": "mdi-file-plus", "color": "#9C27B0", "usesAi": True},
|
||||
"_method": "ai",
|
||||
|
|
@ -129,15 +159,19 @@ AI_NODES = [
|
|||
"label": t("Code generieren"),
|
||||
"description": t("Code aus Beschreibung generieren"),
|
||||
"parameters": [
|
||||
{"name": "prompt", "type": "string", "required": True, "frontendType": "textarea",
|
||||
{"name": "prompt", "type": "str", "required": True, "frontendType": "textarea",
|
||||
"description": t("Code-Generierungs-Prompt")},
|
||||
{"name": "resultType", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "resultType", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["py", "js", "ts", "html", "java", "cpp", "txt", "json", "csv", "xml"]},
|
||||
"description": t("Datei-Endung der erzeugten Code-Datei"), "default": "py"},
|
||||
],
|
||||
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
|
||||
"description": t("Daten aus vorherigen Schritten"), "default": ""},
|
||||
{"name": "documentList", "type": "DocumentList", "required": False, "frontendType": "hidden",
|
||||
"description": t("Dokumente aus vorherigen Schritten"), "default": ""},
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["Transit"]}},
|
||||
"inputPorts": {0: {"accepts": ["FormPayload", "Transit", "AiResult", "DocumentList", "ActionResult"]}},
|
||||
"outputPorts": {0: {"schema": "AiResult"}},
|
||||
"meta": {"icon": "mdi-code-tags", "color": "#9C27B0", "usesAi": True},
|
||||
"_method": "ai",
|
||||
|
|
@ -149,12 +183,12 @@ AI_NODES = [
|
|||
"label": t("KI-Konsolidierung"),
|
||||
"description": t("Gesammelte Ergebnisse mit KI zusammenfassen, klassifizieren oder semantisch zusammenführen"),
|
||||
"parameters": [
|
||||
{"name": "mode", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "mode", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["summarize", "classify", "semanticMerge"]},
|
||||
"description": t("Konsolidierungsmodus"), "default": "summarize"},
|
||||
{"name": "prompt", "type": "string", "required": False, "frontendType": "textarea",
|
||||
{"name": "prompt", "type": "str", "required": False, "frontendType": "textarea",
|
||||
"description": t("Optionaler Prompt für die Konsolidierung"), "default": ""},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}},
|
||||
|
|
|
|||
|
|
@ -11,23 +11,23 @@ CLICKUP_NODES = [
|
|||
"label": t("Aufgaben suchen"),
|
||||
"description": t("Aufgaben in einem Workspace suchen"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "clickup"},
|
||||
"description": t("ClickUp-Verbindung")},
|
||||
{"name": "teamId", "type": "string", "required": True, "frontendType": "text",
|
||||
{"name": "teamId", "type": "str", "required": True, "frontendType": "text",
|
||||
"description": t("Team-/Workspace-ID")},
|
||||
{"name": "query", "type": "string", "required": True, "frontendType": "text",
|
||||
{"name": "query", "type": "str", "required": True, "frontendType": "text",
|
||||
"description": t("Suchbegriff")},
|
||||
{"name": "page", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "page", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Seite"), "default": 0},
|
||||
{"name": "listId", "type": "string", "required": False, "frontendType": "clickupList",
|
||||
{"name": "listId", "type": "str", "required": False, "frontendType": "clickupList",
|
||||
"frontendOptions": {"dependsOn": "connectionReference"},
|
||||
"description": t("In dieser Liste suchen")},
|
||||
{"name": "includeClosed", "type": "boolean", "required": False, "frontendType": "checkbox",
|
||||
{"name": "includeClosed", "type": "bool", "required": False, "frontendType": "checkbox",
|
||||
"description": t("Erledigte einbeziehen"), "default": False},
|
||||
{"name": "fullTaskData", "type": "boolean", "required": False, "frontendType": "checkbox",
|
||||
{"name": "fullTaskData", "type": "bool", "required": False, "frontendType": "checkbox",
|
||||
"description": t("Vollständige Daten"), "default": False},
|
||||
{"name": "matchNameOnly", "type": "boolean", "required": False, "frontendType": "checkbox",
|
||||
{"name": "matchNameOnly", "type": "bool", "required": False, "frontendType": "checkbox",
|
||||
"description": t("Nur Titel"), "default": True},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -44,15 +44,15 @@ CLICKUP_NODES = [
|
|||
"label": t("Aufgaben auflisten"),
|
||||
"description": t("Aufgaben einer Liste auflisten"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "clickup"},
|
||||
"description": t("ClickUp-Verbindung")},
|
||||
{"name": "pathQuery", "type": "string", "required": True, "frontendType": "clickupList",
|
||||
{"name": "pathQuery", "type": "str", "required": True, "frontendType": "clickupList",
|
||||
"frontendOptions": {"dependsOn": "connectionReference"},
|
||||
"description": t("Pfad zur Liste")},
|
||||
{"name": "page", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "page", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Seite"), "default": 0},
|
||||
{"name": "includeClosed", "type": "boolean", "required": False, "frontendType": "checkbox",
|
||||
{"name": "includeClosed", "type": "bool", "required": False, "frontendType": "checkbox",
|
||||
"description": t("Erledigte einbeziehen"), "default": False},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -69,12 +69,12 @@ CLICKUP_NODES = [
|
|||
"label": t("Aufgabe abrufen"),
|
||||
"description": t("Eine Aufgabe abrufen"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "clickup"},
|
||||
"description": t("ClickUp-Verbindung")},
|
||||
{"name": "taskId", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "taskId", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Task-ID")},
|
||||
{"name": "pathQuery", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "pathQuery", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Oder Pfad")},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -91,34 +91,34 @@ CLICKUP_NODES = [
|
|||
"label": t("Aufgabe erstellen"),
|
||||
"description": t("Aufgabe erstellen"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "clickup"},
|
||||
"description": t("ClickUp-Verbindung")},
|
||||
{"name": "pathQuery", "type": "string", "required": False, "frontendType": "clickupList",
|
||||
{"name": "pathQuery", "type": "str", "required": False, "frontendType": "clickupList",
|
||||
"frontendOptions": {"dependsOn": "connectionReference"},
|
||||
"description": t("Pfad zur Liste")},
|
||||
{"name": "listId", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "listId", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Listen-ID")},
|
||||
{"name": "name", "type": "string", "required": True, "frontendType": "text",
|
||||
{"name": "name", "type": "str", "required": True, "frontendType": "text",
|
||||
"description": t("Name")},
|
||||
{"name": "description", "type": "string", "required": False, "frontendType": "textarea",
|
||||
{"name": "description", "type": "str", "required": False, "frontendType": "textarea",
|
||||
"description": t("Beschreibung")},
|
||||
{"name": "taskStatus", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "taskStatus", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Status")},
|
||||
{"name": "taskPriority", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "taskPriority", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["1", "2", "3", "4"]},
|
||||
"description": t("Priorität 1-4")},
|
||||
{"name": "taskDueDateMs", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "taskDueDateMs", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Fälligkeit (ms)")},
|
||||
{"name": "taskAssigneeIds", "type": "object", "required": False, "frontendType": "json",
|
||||
"description": t("Zugewiesene")},
|
||||
{"name": "taskTimeEstimateMs", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "taskTimeEstimateMs", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Zeitschätzung (ms)")},
|
||||
{"name": "taskTimeEstimateHours", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "taskTimeEstimateHours", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Zeitschätzung (h)")},
|
||||
{"name": "customFieldValues", "type": "object", "required": False, "frontendType": "json",
|
||||
"description": t("Benutzerdefinierte Felder")},
|
||||
{"name": "taskFields", "type": "string", "required": False, "frontendType": "json",
|
||||
{"name": "taskFields", "type": "str", "required": False, "frontendType": "json",
|
||||
"description": t("Zusätzliches JSON")},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -135,14 +135,14 @@ CLICKUP_NODES = [
|
|||
"label": t("Aufgabe aktualisieren"),
|
||||
"description": t("Felder der Aufgabe ändern"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "clickup"},
|
||||
"description": t("ClickUp-Verbindung")},
|
||||
{"name": "taskId", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "taskId", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Task-ID")},
|
||||
{"name": "path", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "path", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Oder Pfad")},
|
||||
{"name": "taskUpdate", "type": "string", "required": False, "frontendType": "json",
|
||||
{"name": "taskUpdate", "type": "str", "required": False, "frontendType": "json",
|
||||
"description": t("JSON-Body für PUT /task/{id}, z.B. {\"name\":\"...\",\"status\":\"...\"}")},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -159,16 +159,16 @@ CLICKUP_NODES = [
|
|||
"label": t("Anhang hochladen"),
|
||||
"description": t("Datei an Task anhängen"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "clickup"},
|
||||
"description": t("ClickUp-Verbindung")},
|
||||
{"name": "taskId", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "taskId", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Task-ID")},
|
||||
{"name": "path", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "path", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Oder Pfad")},
|
||||
{"name": "fileName", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "fileName", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Dateiname")},
|
||||
{"name": "content", "type": "string", "required": True, "frontendType": "hidden",
|
||||
{"name": "content", "type": "str", "required": True, "frontendType": "hidden",
|
||||
"description": t("Datei-Inhalt aus Upstream-Node (via Wire oder DataRef)"), "default": ""},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ CONTEXT_NODES = [
|
|||
"label": t("Inhalt extrahieren"),
|
||||
"description": t("Dokumentstruktur extrahieren ohne KI (Seiten, Abschnitte, Bilder, Tabellen)"),
|
||||
"parameters": [
|
||||
{"name": "documentList", "type": "string", "required": True, "frontendType": "hidden",
|
||||
{"name": "documentList", "type": "str", "required": True, "frontendType": "hidden",
|
||||
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
|
||||
{"name": "extractionOptions", "type": "object", "required": False, "frontendType": "json",
|
||||
"description": t(
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ DATA_NODES = [
|
|||
"label": t("Sammeln"),
|
||||
"description": t("Ergebnisse aus Schleifen-Iterationen sammeln"),
|
||||
"parameters": [
|
||||
{"name": "mode", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "mode", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["collect", "concat", "sum", "count"]},
|
||||
"description": t("Aggregationsmodus"), "default": "collect"},
|
||||
],
|
||||
|
|
@ -27,9 +27,9 @@ DATA_NODES = [
|
|||
"label": t("Filtern"),
|
||||
"description": t("Elemente nach Bedingung filtern"),
|
||||
"parameters": [
|
||||
{"name": "condition", "type": "string", "required": True, "frontendType": "filterExpression",
|
||||
{"name": "condition", "type": "str", "required": True, "frontendType": "filterExpression",
|
||||
"description": t("Filterbedingung")},
|
||||
{"name": "udmContentType", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "udmContentType", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["", "text", "image", "table", "code", "media", "link", "formula"]},
|
||||
"description": t("UDM-ContentType-Filter (optional, leer = kein UDM-Filter)"), "default": ""},
|
||||
],
|
||||
|
|
@ -46,10 +46,10 @@ DATA_NODES = [
|
|||
"label": t("Konsolidieren"),
|
||||
"description": t("Gesammelte Ergebnisse deterministisch zusammenführen (Tabelle, CSV, Merge)"),
|
||||
"parameters": [
|
||||
{"name": "mode", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "mode", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["table", "concat", "merge", "csvJoin"]},
|
||||
"description": t("Konsolidierungsmodus"), "default": "table"},
|
||||
{"name": "separator", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "separator", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Trennzeichen (für concat/csvJoin)"), "default": "\n"},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
|
|||
|
|
@ -10,14 +10,14 @@ EMAIL_NODES = [
|
|||
"label": t("E-Mail prüfen"),
|
||||
"description": t("Neue E-Mails prüfen"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "msft"},
|
||||
"description": t("E-Mail-Konto Verbindung")},
|
||||
{"name": "folder", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "folder", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Ordner"), "default": "Inbox"},
|
||||
{"name": "limit", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "limit", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Max E-Mails"), "default": 100},
|
||||
{"name": "filter", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "filter", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Filter-Ausdruck (z.B. 'from:max@example.com hasAttachment:true betreff')"), "default": ""},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -34,14 +34,14 @@ EMAIL_NODES = [
|
|||
"label": t("E-Mail suchen"),
|
||||
"description": t("E-Mails suchen"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "msft"},
|
||||
"description": t("E-Mail-Konto Verbindung")},
|
||||
{"name": "query", "type": "string", "required": True, "frontendType": "text",
|
||||
{"name": "query", "type": "str", "required": True, "frontendType": "text",
|
||||
"description": t("Suchausdruck (z.B. 'from:max@example.com hasAttachments:true Rechnung')")},
|
||||
{"name": "folder", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "folder", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Ordner"), "default": "All"},
|
||||
{"name": "limit", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "limit", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Max E-Mails"), "default": 100},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -59,19 +59,19 @@ EMAIL_NODES = [
|
|||
"description": t(
|
||||
"AI-gestützt einen E-Mail-Entwurf aus Kontext und optionalen Dokumenten erstellen"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "msft"},
|
||||
"description": t("E-Mail-Konto")},
|
||||
{"name": "context", "type": "string", "required": False, "frontendType": "textarea",
|
||||
"description": t("Kontext / Brief-Beschreibung für die KI-Komposition"), "default": ""},
|
||||
{"name": "to", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "context", "type": "Any", "required": False, "frontendType": "templateTextarea",
|
||||
"description": t("Daten aus vorherigen Schritten (oder direkte Beschreibung)"), "default": ""},
|
||||
{"name": "to", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Empfänger (komma-separiert, optional für Entwurf)"), "default": ""},
|
||||
{"name": "documentList", "type": "string", "required": False, "frontendType": "hidden",
|
||||
{"name": "documentList", "type": "str", "required": False, "frontendType": "hidden",
|
||||
"description": t("Anhang-Dokumente (via Wire oder DataRef)"), "default": ""},
|
||||
{"name": "emailContent", "type": "string", "required": False, "frontendType": "hidden",
|
||||
{"name": "emailContent", "type": "str", "required": False, "frontendType": "hidden",
|
||||
"description": t("Direkt vorbereiteter Inhalt {subject, body, to} (via Wire — überspringt KI)"),
|
||||
"default": ""},
|
||||
{"name": "emailStyle", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "emailStyle", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["formal", "casual", "business"]},
|
||||
"description": t("Stil"), "default": "business"},
|
||||
],
|
||||
|
|
|
|||
|
|
@ -12,23 +12,23 @@ FILE_NODES = [
|
|||
"parameters": [
|
||||
{"name": "contentSources", "type": "json", "required": False, "frontendType": "json",
|
||||
"description": t("Kontext-Quellen"), "default": []},
|
||||
{"name": "outputFormat", "type": "string", "required": True, "frontendType": "select",
|
||||
{"name": "outputFormat", "type": "str", "required": True, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["docx", "pdf", "txt", "html", "md"]},
|
||||
"description": t("Ausgabeformat"), "default": "docx"},
|
||||
{"name": "title", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "title", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Dokumenttitel")},
|
||||
{"name": "templateName", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "templateName", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["default", "corporate", "minimal"]},
|
||||
"description": t("Stil-Vorlage")},
|
||||
{"name": "language", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "language", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["de", "en", "fr"]},
|
||||
"description": t("Sprache"), "default": "de"},
|
||||
{"name": "context", "type": "string", "required": False, "frontendType": "hidden",
|
||||
"description": t("Inhalt (via Wire oder DataRef)"), "default": ""},
|
||||
{"name": "context", "type": "Any", "required": False, "frontendType": "contextBuilder",
|
||||
"description": t("Daten aus vorherigen Schritten"), "default": ""},
|
||||
],
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["AiResult", "TextResult", "Transit"]}},
|
||||
"inputPorts": {0: {"accepts": ["AiResult", "TextResult", "Transit", "FormPayload"]}},
|
||||
"outputPorts": {0: {"schema": "DocumentList"}},
|
||||
"meta": {"icon": "mdi-file-plus-outline", "color": "#2196F3", "usesAi": False},
|
||||
"_method": "file",
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ FLOW_NODES = [
|
|||
"parameters": [
|
||||
{
|
||||
"name": "condition",
|
||||
"type": "string",
|
||||
"type": "str",
|
||||
"required": True,
|
||||
"frontendType": "condition",
|
||||
"description": t("Bedingung"),
|
||||
|
|
@ -34,7 +34,7 @@ FLOW_NODES = [
|
|||
"parameters": [
|
||||
{
|
||||
"name": "value",
|
||||
"type": "string",
|
||||
"type": "str",
|
||||
"required": True,
|
||||
"frontendType": "text",
|
||||
"description": t("Zu vergleichender Wert"),
|
||||
|
|
@ -62,14 +62,14 @@ FLOW_NODES = [
|
|||
"parameters": [
|
||||
{
|
||||
"name": "items",
|
||||
"type": "string",
|
||||
"type": "str",
|
||||
"required": True,
|
||||
"frontendType": "text",
|
||||
"description": t("Pfad zum Array"),
|
||||
},
|
||||
{
|
||||
"name": "level",
|
||||
"type": "string",
|
||||
"type": "str",
|
||||
"required": False,
|
||||
"frontendType": "select",
|
||||
"frontendOptions": {"options": ["auto", "documents", "structuralNodes", "contentBlocks"]},
|
||||
|
|
@ -78,7 +78,7 @@ FLOW_NODES = [
|
|||
},
|
||||
{
|
||||
"name": "concurrency",
|
||||
"type": "number",
|
||||
"type": "int",
|
||||
"required": False,
|
||||
"frontendType": "number",
|
||||
"frontendOptions": {"min": 1, "max": 20},
|
||||
|
|
@ -103,7 +103,7 @@ FLOW_NODES = [
|
|||
"parameters": [
|
||||
{
|
||||
"name": "mode",
|
||||
"type": "string",
|
||||
"type": "str",
|
||||
"required": False,
|
||||
"frontendType": "select",
|
||||
"frontendOptions": {"options": ["first", "all", "append"]},
|
||||
|
|
@ -112,7 +112,7 @@ FLOW_NODES = [
|
|||
},
|
||||
{
|
||||
"name": "inputCount",
|
||||
"type": "number",
|
||||
"type": "int",
|
||||
"required": False,
|
||||
"frontendType": "number",
|
||||
"frontendOptions": {"min": 2, "max": 5},
|
||||
|
|
|
|||
|
|
@ -3,6 +3,18 @@
|
|||
|
||||
from modules.shared.i18nRegistry import t
|
||||
|
||||
# Canonical form field types — single source of truth.
|
||||
# portType maps to the PORT_TYPE_CATALOG primitive used by DataPicker / validateGraph.
|
||||
FORM_FIELD_TYPES = [
|
||||
{"id": "text", "label": "Text (einzeilig)", "portType": "str"},
|
||||
{"id": "textarea", "label": "Text (mehrzeilig)", "portType": "str"},
|
||||
{"id": "number", "label": "Zahl", "portType": "int"},
|
||||
{"id": "boolean", "label": "Ja/Nein", "portType": "bool"},
|
||||
{"id": "date", "label": "Datum", "portType": "str"},
|
||||
{"id": "email", "label": "E-Mail", "portType": "str"},
|
||||
{"id": "select", "label": "Auswahl", "portType": "str"},
|
||||
]
|
||||
|
||||
INPUT_NODES = [
|
||||
{
|
||||
"id": "input.form",
|
||||
|
|
@ -32,11 +44,11 @@ INPUT_NODES = [
|
|||
"label": t("Genehmigung"),
|
||||
"description": t("Benutzer genehmigt oder lehnt ab"),
|
||||
"parameters": [
|
||||
{"name": "title", "type": "string", "required": True, "frontendType": "text",
|
||||
{"name": "title", "type": "str", "required": True, "frontendType": "text",
|
||||
"description": t("Genehmigungstitel")},
|
||||
{"name": "description", "type": "string", "required": False, "frontendType": "textarea",
|
||||
{"name": "description", "type": "str", "required": False, "frontendType": "textarea",
|
||||
"description": t("Was genehmigt werden soll")},
|
||||
{"name": "approvalType", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "approvalType", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["generic", "document"]},
|
||||
"description": t("Typ: document oder generic"), "default": "generic"},
|
||||
],
|
||||
|
|
@ -53,14 +65,14 @@ INPUT_NODES = [
|
|||
"label": t("Upload"),
|
||||
"description": t("Benutzer lädt Datei(en) hoch"),
|
||||
"parameters": [
|
||||
{"name": "accept", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "accept", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Accept-String"), "default": ""},
|
||||
{"name": "allowedTypes", "type": "json", "required": False, "frontendType": "multiselect",
|
||||
"frontendOptions": {"options": ["pdf", "docx", "xlsx", "pptx", "txt", "csv", "jpg", "png", "gif"]},
|
||||
"description": t("Ausgewählte Dateitypen"), "default": []},
|
||||
{"name": "maxSize", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "maxSize", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Max. Dateigröße in MB"), "default": 10},
|
||||
{"name": "multiple", "type": "boolean", "required": False, "frontendType": "checkbox",
|
||||
{"name": "multiple", "type": "bool", "required": False, "frontendType": "checkbox",
|
||||
"description": t("Mehrere Dateien erlauben"), "default": False},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -76,9 +88,9 @@ INPUT_NODES = [
|
|||
"label": t("Kommentar"),
|
||||
"description": t("Benutzer fügt einen Kommentar hinzu"),
|
||||
"parameters": [
|
||||
{"name": "placeholder", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "placeholder", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Platzhalter"), "default": ""},
|
||||
{"name": "required", "type": "boolean", "required": False, "frontendType": "checkbox",
|
||||
{"name": "required", "type": "bool", "required": False, "frontendType": "checkbox",
|
||||
"description": t("Kommentar erforderlich"), "default": True},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -94,9 +106,9 @@ INPUT_NODES = [
|
|||
"label": t("Prüfung"),
|
||||
"description": t("Benutzer prüft Inhalt"),
|
||||
"parameters": [
|
||||
{"name": "contentRef", "type": "string", "required": True, "frontendType": "text",
|
||||
{"name": "contentRef", "type": "str", "required": True, "frontendType": "text",
|
||||
"description": t("Referenz auf Inhalt")},
|
||||
{"name": "reviewType", "type": "string", "required": False, "frontendType": "select",
|
||||
{"name": "reviewType", "type": "str", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["generic", "document"]},
|
||||
"description": t("Art der Prüfung"), "default": "generic"},
|
||||
],
|
||||
|
|
@ -115,7 +127,7 @@ INPUT_NODES = [
|
|||
"parameters": [
|
||||
{"name": "options", "type": "json", "required": True, "frontendType": "keyValueRows",
|
||||
"description": t("Optionen"), "default": []},
|
||||
{"name": "multiple", "type": "boolean", "required": False, "frontendType": "checkbox",
|
||||
{"name": "multiple", "type": "bool", "required": False, "frontendType": "checkbox",
|
||||
"description": t("Mehrfachauswahl erlauben"), "default": False},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -131,11 +143,11 @@ INPUT_NODES = [
|
|||
"label": t("Bestätigung"),
|
||||
"description": t("Benutzer bestätigt Ja/Nein"),
|
||||
"parameters": [
|
||||
{"name": "question", "type": "string", "required": True, "frontendType": "text",
|
||||
{"name": "question", "type": "str", "required": True, "frontendType": "text",
|
||||
"description": t("Zu bestätigende Frage")},
|
||||
{"name": "confirmLabel", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "confirmLabel", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Label für Bestätigen-Button"), "default": "Confirm"},
|
||||
{"name": "rejectLabel", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "rejectLabel", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Label für Ablehnen-Button"), "default": "Reject"},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ REDMINE_NODES = [
|
|||
"description": t("Einzelnes Redmine-Ticket aus dem Mirror laden."),
|
||||
"parameters": [
|
||||
dict(_REDMINE_INSTANCE_PARAM),
|
||||
{"name": "ticketId", "type": "number", "required": True, "frontendType": "number",
|
||||
{"name": "ticketId", "type": "int", "required": True, "frontendType": "number",
|
||||
"description": t("Redmine-Ticket-ID")},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -43,17 +43,17 @@ REDMINE_NODES = [
|
|||
"description": t("Tickets aus dem lokalen Mirror mit Filtern (Tracker, Status, Zeitraum, Zuweisung)."),
|
||||
"parameters": [
|
||||
dict(_REDMINE_INSTANCE_PARAM),
|
||||
{"name": "trackerIds", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "trackerIds", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Tracker-IDs (Komma-separiert)"), "default": ""},
|
||||
{"name": "status", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "status", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Status-Filter: open | closed | *"), "default": "*"},
|
||||
{"name": "dateFrom", "type": "string", "required": False, "frontendType": "date",
|
||||
{"name": "dateFrom", "type": "str", "required": False, "frontendType": "date",
|
||||
"description": t("Zeitraum ab (ISO-Datum)"), "default": ""},
|
||||
{"name": "dateTo", "type": "string", "required": False, "frontendType": "date",
|
||||
{"name": "dateTo", "type": "str", "required": False, "frontendType": "date",
|
||||
"description": t("Zeitraum bis (ISO-Datum)"), "default": ""},
|
||||
{"name": "assignedToId", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "assignedToId", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Nur Tickets dieses Benutzers (ID)")},
|
||||
{"name": "limit", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "limit", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Max. Anzahl Tickets (1-500)"), "default": 100},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -71,21 +71,21 @@ REDMINE_NODES = [
|
|||
"description": t("Neues Ticket in Redmine anlegen. Mirror wird sofort aktualisiert."),
|
||||
"parameters": [
|
||||
dict(_REDMINE_INSTANCE_PARAM),
|
||||
{"name": "subject", "type": "string", "required": True, "frontendType": "text",
|
||||
{"name": "subject", "type": "str", "required": True, "frontendType": "text",
|
||||
"description": t("Ticket-Titel")},
|
||||
{"name": "trackerId", "type": "number", "required": True, "frontendType": "number",
|
||||
{"name": "trackerId", "type": "int", "required": True, "frontendType": "number",
|
||||
"description": t("Tracker-ID (Userstory, Feature, Task, ...)")},
|
||||
{"name": "description", "type": "string", "required": False, "frontendType": "textarea",
|
||||
{"name": "description", "type": "str", "required": False, "frontendType": "textarea",
|
||||
"description": t("Ticket-Beschreibung"), "default": ""},
|
||||
{"name": "statusId", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "statusId", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Status-ID (optional)")},
|
||||
{"name": "priorityId", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "priorityId", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Prioritaet-ID (optional)")},
|
||||
{"name": "assignedToId", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "assignedToId", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Zugewiesene Benutzer-ID (optional)")},
|
||||
{"name": "parentIssueId", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "parentIssueId", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Uebergeordnetes Ticket (optional)")},
|
||||
{"name": "customFields", "type": "string", "required": False, "frontendType": "textarea",
|
||||
{"name": "customFields", "type": "str", "required": False, "frontendType": "textarea",
|
||||
"description": t("Custom Fields als JSON {id: value}"), "default": ""},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -103,25 +103,25 @@ REDMINE_NODES = [
|
|||
"description": t("Felder eines Redmine-Tickets aktualisieren. Nur gesetzte Felder werden uebertragen."),
|
||||
"parameters": [
|
||||
dict(_REDMINE_INSTANCE_PARAM),
|
||||
{"name": "ticketId", "type": "number", "required": True, "frontendType": "number",
|
||||
{"name": "ticketId", "type": "int", "required": True, "frontendType": "number",
|
||||
"description": t("Ticket-ID")},
|
||||
{"name": "subject", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "subject", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Neuer Titel")},
|
||||
{"name": "description", "type": "string", "required": False, "frontendType": "textarea",
|
||||
{"name": "description", "type": "str", "required": False, "frontendType": "textarea",
|
||||
"description": t("Neue Beschreibung")},
|
||||
{"name": "trackerId", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "trackerId", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Neuer Tracker")},
|
||||
{"name": "statusId", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "statusId", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Neuer Status")},
|
||||
{"name": "priorityId", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "priorityId", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Neue Prioritaet")},
|
||||
{"name": "assignedToId", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "assignedToId", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Neue Zuweisung")},
|
||||
{"name": "parentIssueId", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "parentIssueId", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Neues Parent-Ticket")},
|
||||
{"name": "notes", "type": "string", "required": False, "frontendType": "textarea",
|
||||
{"name": "notes", "type": "str", "required": False, "frontendType": "textarea",
|
||||
"description": t("Kommentar (Journal-Eintrag)"), "default": ""},
|
||||
{"name": "customFields", "type": "string", "required": False, "frontendType": "textarea",
|
||||
{"name": "customFields", "type": "str", "required": False, "frontendType": "textarea",
|
||||
"description": t("Custom Fields als JSON {id: value}"), "default": ""},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -139,13 +139,13 @@ REDMINE_NODES = [
|
|||
"description": t("Aggregierte Kennzahlen (KPIs, Durchsatz, Status-Verteilung, Backlog) aus dem Mirror."),
|
||||
"parameters": [
|
||||
dict(_REDMINE_INSTANCE_PARAM),
|
||||
{"name": "dateFrom", "type": "string", "required": False, "frontendType": "date",
|
||||
{"name": "dateFrom", "type": "str", "required": False, "frontendType": "date",
|
||||
"description": t("Zeitraum ab")},
|
||||
{"name": "dateTo", "type": "string", "required": False, "frontendType": "date",
|
||||
{"name": "dateTo", "type": "str", "required": False, "frontendType": "date",
|
||||
"description": t("Zeitraum bis")},
|
||||
{"name": "bucket", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "bucket", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Bucket: day | week | month"), "default": "week"},
|
||||
{"name": "trackerIds", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "trackerIds", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Tracker-IDs (Komma-separiert)"), "default": ""},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -163,7 +163,7 @@ REDMINE_NODES = [
|
|||
"description": t("Tickets und Beziehungen aus Redmine in den lokalen Mirror uebernehmen."),
|
||||
"parameters": [
|
||||
dict(_REDMINE_INSTANCE_PARAM),
|
||||
{"name": "force", "type": "boolean", "required": False, "frontendType": "checkbox",
|
||||
{"name": "force", "type": "bool", "required": False, "frontendType": "checkbox",
|
||||
"description": t("Vollsync erzwingen (ignoriert lastSyncAt)"), "default": False},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
|
|||
|
|
@ -10,14 +10,14 @@ SHAREPOINT_NODES = [
|
|||
"label": t("Datei finden"),
|
||||
"description": t("Datei nach Pfad oder Suche finden"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "msft"},
|
||||
"description": t("SharePoint-Verbindung")},
|
||||
{"name": "searchQuery", "type": "string", "required": True, "frontendType": "text",
|
||||
{"name": "searchQuery", "type": "str", "required": True, "frontendType": "text",
|
||||
"description": t("Suchanfrage oder Pfad")},
|
||||
{"name": "site", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "site", "type": "str", "required": False, "frontendType": "text",
|
||||
"description": t("Optionaler Site-Hinweis"), "default": ""},
|
||||
{"name": "maxResults", "type": "number", "required": False, "frontendType": "number",
|
||||
{"name": "maxResults", "type": "int", "required": False, "frontendType": "number",
|
||||
"description": t("Max Ergebnisse"), "default": 1000},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -34,10 +34,10 @@ SHAREPOINT_NODES = [
|
|||
"label": t("Datei lesen"),
|
||||
"description": t("Inhalt aus Datei extrahieren"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "msft"},
|
||||
"description": t("SharePoint-Verbindung")},
|
||||
{"name": "pathQuery", "type": "string", "required": True, "frontendType": "sharepointFile",
|
||||
{"name": "pathQuery", "type": "str", "required": True, "frontendType": "sharepointFile",
|
||||
"frontendOptions": {"dependsOn": "connectionReference"},
|
||||
"description": t("Dateipfad")},
|
||||
],
|
||||
|
|
@ -55,13 +55,13 @@ SHAREPOINT_NODES = [
|
|||
"label": t("Datei hochladen"),
|
||||
"description": t("Datei zu SharePoint hochladen"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "msft"},
|
||||
"description": t("SharePoint-Verbindung")},
|
||||
{"name": "pathQuery", "type": "string", "required": True, "frontendType": "sharepointFolder",
|
||||
{"name": "pathQuery", "type": "str", "required": True, "frontendType": "sharepointFolder",
|
||||
"frontendOptions": {"dependsOn": "connectionReference"},
|
||||
"description": t("Zielordner-Pfad")},
|
||||
{"name": "content", "type": "string", "required": True, "frontendType": "hidden",
|
||||
{"name": "content", "type": "str", "required": True, "frontendType": "hidden",
|
||||
"description": t("Datei-Inhalt aus Upstream-Node (via Wire oder DataRef)"), "default": ""},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -78,10 +78,10 @@ SHAREPOINT_NODES = [
|
|||
"label": t("Dateien auflisten"),
|
||||
"description": t("Dateien in Ordner auflisten"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "msft"},
|
||||
"description": t("SharePoint-Verbindung")},
|
||||
{"name": "pathQuery", "type": "string", "required": False, "frontendType": "sharepointFolder",
|
||||
{"name": "pathQuery", "type": "str", "required": False, "frontendType": "sharepointFolder",
|
||||
"frontendOptions": {"dependsOn": "connectionReference"},
|
||||
"description": t("Ordnerpfad"), "default": "/"},
|
||||
],
|
||||
|
|
@ -99,10 +99,10 @@ SHAREPOINT_NODES = [
|
|||
"label": t("Datei herunterladen"),
|
||||
"description": t("Datei vom Pfad herunterladen"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "msft"},
|
||||
"description": t("SharePoint-Verbindung")},
|
||||
{"name": "pathQuery", "type": "string", "required": True, "frontendType": "sharepointFile",
|
||||
{"name": "pathQuery", "type": "str", "required": True, "frontendType": "sharepointFile",
|
||||
"frontendOptions": {"dependsOn": "connectionReference"},
|
||||
"description": t("Vollständiger Dateipfad")},
|
||||
],
|
||||
|
|
@ -120,13 +120,13 @@ SHAREPOINT_NODES = [
|
|||
"label": t("Datei kopieren"),
|
||||
"description": t("Datei an Ziel kopieren"),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": True, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": True, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "msft"},
|
||||
"description": t("SharePoint-Verbindung")},
|
||||
{"name": "sourcePath", "type": "string", "required": True, "frontendType": "sharepointFile",
|
||||
{"name": "sourcePath", "type": "str", "required": True, "frontendType": "sharepointFile",
|
||||
"frontendOptions": {"dependsOn": "connectionReference"},
|
||||
"description": t("Quelldatei-Pfad")},
|
||||
{"name": "destPath", "type": "string", "required": True, "frontendType": "sharepointFolder",
|
||||
{"name": "destPath", "type": "str", "required": True, "frontendType": "sharepointFolder",
|
||||
"frontendOptions": {"dependsOn": "connectionReference"},
|
||||
"description": t("Zielordner")},
|
||||
],
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ TRIGGER_NODES = [
|
|||
"parameters": [
|
||||
{
|
||||
"name": "cron",
|
||||
"type": "string",
|
||||
"type": "str",
|
||||
"required": False,
|
||||
"frontendType": "cron",
|
||||
"description": t("Cron-Ausdruck"),
|
||||
|
|
|
|||
|
|
@ -25,11 +25,11 @@ TRUSTEE_NODES = [
|
|||
"description": t("Buchhaltungsdaten aus externem System importieren/aktualisieren."),
|
||||
"parameters": [
|
||||
dict(_TRUSTEE_INSTANCE_PARAM),
|
||||
{"name": "forceRefresh", "type": "boolean", "required": False, "frontendType": "checkbox",
|
||||
{"name": "forceRefresh", "type": "bool", "required": False, "frontendType": "checkbox",
|
||||
"description": t("Import erzwingen"), "default": False},
|
||||
{"name": "dateFrom", "type": "string", "required": False, "frontendType": "date",
|
||||
{"name": "dateFrom", "type": "str", "required": False, "frontendType": "date",
|
||||
"description": t("Startdatum"), "default": ""},
|
||||
{"name": "dateTo", "type": "string", "required": False, "frontendType": "date",
|
||||
{"name": "dateTo", "type": "str", "required": False, "frontendType": "date",
|
||||
"description": t("Enddatum"), "default": ""},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -46,14 +46,14 @@ TRUSTEE_NODES = [
|
|||
"label": t("Dokumente extrahieren"),
|
||||
"description": t("Dokumenttyp und Daten aus PDF/JPG per AI extrahieren."),
|
||||
"parameters": [
|
||||
{"name": "connectionReference", "type": "string", "required": False, "frontendType": "userConnection",
|
||||
{"name": "connectionReference", "type": "str", "required": False, "frontendType": "userConnection",
|
||||
"frontendOptions": {"authority": "msft"},
|
||||
"description": t("SharePoint-Verbindung"), "default": ""},
|
||||
{"name": "sharepointFolder", "type": "string", "required": False, "frontendType": "sharepointFolder",
|
||||
{"name": "sharepointFolder", "type": "str", "required": False, "frontendType": "sharepointFolder",
|
||||
"frontendOptions": {"dependsOn": "connectionReference"},
|
||||
"description": t("SharePoint-Ordnerpfad"), "default": ""},
|
||||
dict(_TRUSTEE_INSTANCE_PARAM),
|
||||
{"name": "prompt", "type": "string", "required": False, "frontendType": "textarea",
|
||||
{"name": "prompt", "type": "str", "required": False, "frontendType": "textarea",
|
||||
"description": t("AI-Prompt für Extraktion"), "default": ""},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -77,7 +77,7 @@ TRUSTEE_NODES = [
|
|||
# is List[ActionDocument] (see datamodelChat.ActionResult). The
|
||||
# DataPicker uses this string to filter compatible upstream paths.
|
||||
{"name": "documentList", "type": "List[ActionDocument]", "required": True, "frontendType": "dataRef",
|
||||
"description": t("Dokumentenliste — gebunden via DataRef.")},
|
||||
"description": t("Dokumente aus vorherigen Schritten")},
|
||||
dict(_TRUSTEE_INSTANCE_PARAM),
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -95,7 +95,7 @@ TRUSTEE_NODES = [
|
|||
"description": t("Trustee-Positionen in Buchhaltungssystem übertragen."),
|
||||
"parameters": [
|
||||
{"name": "documentList", "type": "List[ActionDocument]", "required": True, "frontendType": "dataRef",
|
||||
"description": t("Verarbeitete Dokumentenliste — gebunden via DataRef.")},
|
||||
"description": t("Dokumente aus vorherigen Schritten")},
|
||||
dict(_TRUSTEE_INSTANCE_PARAM),
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -113,25 +113,25 @@ TRUSTEE_NODES = [
|
|||
"description": t("Daten aus der Trustee-DB lesen (Lookup, Aggregation, Roh-Export). Pendant zu refreshAccountingData ohne externen Sync."),
|
||||
"parameters": [
|
||||
dict(_TRUSTEE_INSTANCE_PARAM),
|
||||
{"name": "mode", "type": "string", "required": True, "frontendType": "select",
|
||||
{"name": "mode", "type": "str", "required": True, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["lookup", "raw", "aggregate"]},
|
||||
"description": t("Abfragemodus"), "default": "lookup"},
|
||||
{"name": "entity", "type": "string", "required": True, "frontendType": "select",
|
||||
{"name": "entity", "type": "str", "required": True, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["tenantWithRent", "contact", "journalLines", "accounts", "balances"]},
|
||||
"description": t("Entität, die gelesen werden soll"), "default": "tenantWithRent"},
|
||||
{"name": "tenantNameRef", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "tenantNameRef", "type": "str", "required": False, "frontendType": "text",
|
||||
"frontendOptions": {"dependsOn": "entity", "showWhen": ["tenantWithRent", "contact"]},
|
||||
"description": t("Mietername (oder {{wire.feld}} aus Upstream)"), "default": ""},
|
||||
{"name": "tenantAddressRef", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "tenantAddressRef", "type": "str", "required": False, "frontendType": "text",
|
||||
"frontendOptions": {"dependsOn": "entity", "showWhen": ["tenantWithRent", "contact"]},
|
||||
"description": t("Mieteradresse (Toleranz für Tippfehler)"), "default": ""},
|
||||
{"name": "period", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "period", "type": "str", "required": False, "frontendType": "text",
|
||||
"frontendOptions": {"dependsOn": "entity", "showWhen": ["tenantWithRent", "journalLines", "balances"]},
|
||||
"description": t("Zeitraum (YYYY oder YYYY-MM-DD/YYYY-MM-DD)"), "default": ""},
|
||||
{"name": "rentAccountPattern", "type": "string", "required": False, "frontendType": "text",
|
||||
{"name": "rentAccountPattern", "type": "str", "required": False, "frontendType": "text",
|
||||
"frontendOptions": {"dependsOn": "entity", "showWhen": ["tenantWithRent"]},
|
||||
"description": t("Konto-Filter für Mietzins (z.B. '6000-6099' oder '6*')"), "default": ""},
|
||||
{"name": "filterJson", "type": "string", "required": False, "frontendType": "textarea",
|
||||
{"name": "filterJson", "type": "str", "required": False, "frontendType": "textarea",
|
||||
"frontendOptions": {"dependsOn": "mode", "showWhen": ["raw", "aggregate"]},
|
||||
"description": t("Optionaler JSON-Filter für mode=raw/aggregate"), "default": ""},
|
||||
],
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import logging
|
|||
from typing import Dict, List, Any, Optional
|
||||
|
||||
from modules.features.graphicalEditor.nodeDefinitions import STATIC_NODE_TYPES
|
||||
from modules.features.graphicalEditor.nodeDefinitions.input import FORM_FIELD_TYPES
|
||||
from modules.features.graphicalEditor.nodeAdapter import bindsActionFromLegacy
|
||||
from modules.features.graphicalEditor.portTypes import PORT_TYPE_CATALOG, SYSTEM_VARIABLES
|
||||
from modules.shared.i18nRegistry import normalizePrimaryLanguageTag, resolveText
|
||||
|
|
@ -119,6 +120,7 @@ def getNodeTypesForApi(
|
|||
"categories": categories,
|
||||
"portTypeCatalog": catalogSerialized,
|
||||
"systemVariables": SYSTEM_VARIABLES,
|
||||
"formFieldTypes": FORM_FIELD_TYPES,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@ class PortField(BaseModel):
|
|||
# FeatureInstanceRef.featureCode). Pickers/validators use it to filter compatible
|
||||
# producers by sub-type. Type must be "str" when discriminator is True.
|
||||
discriminator: bool = False
|
||||
# Surfaces this field at the top of the DataPicker list as the most common pick.
|
||||
recommended: bool = False
|
||||
|
||||
|
||||
class PortSchema(BaseModel):
|
||||
|
|
@ -83,7 +85,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
|
|||
PortField(name="listId", type="str", description="ClickUp-Listen-ID"),
|
||||
PortField(name="name", type="str", required=False, description="Listenname"),
|
||||
PortField(name="spaceId", type="str", required=False, description="Space-ID"),
|
||||
PortField(name="folderId", type="str", required=False, description="Ordner-ID"),
|
||||
PortField(name="groupId", type="str", required=False, description="Gruppen-ID für die Gruppierungszuordnung"),
|
||||
PortField(name="connection", type="ConnectionRef", required=False,
|
||||
description="ClickUp-Verbindung"),
|
||||
]),
|
||||
|
|
@ -153,7 +155,7 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
|
|||
]),
|
||||
"DocumentList": PortSchema(name="DocumentList", fields=[
|
||||
PortField(name="documents", type="List[Document]",
|
||||
description="Dokumentenliste"),
|
||||
description="Dokumente aus vorherigen Schritten", recommended=True),
|
||||
PortField(name="connection", type="ConnectionRef", required=False,
|
||||
description="Verbindung, mit der die Liste erzeugt wurde"),
|
||||
PortField(name="source", type="SharePointFolderRef", required=False,
|
||||
|
|
@ -219,9 +221,9 @@ PORT_TYPE_CATALOG: Dict[str, PortSchema] = {
|
|||
PortField(name="prompt", type="str",
|
||||
description="Prompt"),
|
||||
PortField(name="response", type="str",
|
||||
description="Antworttext"),
|
||||
description="Antworttext", recommended=True),
|
||||
PortField(name="responseData", type="Dict", required=False,
|
||||
description="Strukturierte Antwort"),
|
||||
description="Strukturierte Antwort (nur bei JSON-Ausgabe)"),
|
||||
PortField(name="context", type="str",
|
||||
description="Kontext"),
|
||||
PortField(name="documents", type="List[Document]",
|
||||
|
|
@ -658,8 +660,11 @@ def normalizeToSchema(raw: Any, schemaName: str) -> Dict[str, Any]:
|
|||
if not schema or schemaName == "Transit":
|
||||
return result
|
||||
|
||||
# Only default **required** fields. Optional fields stay absent so DataRefs / context
|
||||
# resolution never pick a synthetic `{}` or `[]` (e.g. AiResult.responseData when the
|
||||
# model returned plain text only).
|
||||
for field in schema.fields:
|
||||
if field.name not in result:
|
||||
if field.name not in result and field.required:
|
||||
result[field.name] = _defaultForType(field.type)
|
||||
|
||||
return result
|
||||
|
|
@ -740,6 +745,9 @@ def _resolveTransitChain(
|
|||
|
||||
def deriveFormPayloadSchemaFromParam(node: Dict[str, Any], param_key: str) -> Optional[PortSchema]:
|
||||
"""Derive output schema from a field-builder JSON list (``fields``, ``formFields``, …)."""
|
||||
from modules.features.graphicalEditor.nodeDefinitions.input import FORM_FIELD_TYPES
|
||||
_FORM_TYPE_TO_PORT: Dict[str, str] = {f["id"]: f["portType"] for f in FORM_FIELD_TYPES}
|
||||
|
||||
fields_param = (node.get("parameters") or {}).get(param_key)
|
||||
if not fields_param or not isinstance(fields_param, list):
|
||||
return None
|
||||
|
|
@ -749,9 +757,11 @@ def deriveFormPayloadSchemaFromParam(node: Dict[str, Any], param_key: str) -> Op
|
|||
_desc = resolveText(lab) if lab is not None else fname
|
||||
if not str(_desc).strip():
|
||||
_desc = fname
|
||||
raw_type = str(ftype) if ftype is not None else "str"
|
||||
port_type = _FORM_TYPE_TO_PORT.get(raw_type, raw_type)
|
||||
portFields.append(PortField(
|
||||
name=fname,
|
||||
type=str(ftype) if ftype is not None else "str",
|
||||
type=port_type,
|
||||
description=_desc,
|
||||
required=required,
|
||||
))
|
||||
|
|
|
|||
|
|
@ -111,6 +111,44 @@ def _validateInstanceAccess(instanceId: str, context: RequestContext) -> str:
|
|||
return str(instance.mandateId) if instance.mandateId else ""
|
||||
|
||||
|
||||
def _validateTargetInstance(
|
||||
workflowData: Dict[str, Any],
|
||||
ownerInstanceId: str,
|
||||
context: RequestContext,
|
||||
) -> None:
|
||||
"""Enforce targetFeatureInstanceId rules for non-template workflows.
|
||||
|
||||
- Templates (isTemplate=True) may omit targetFeatureInstanceId.
|
||||
- Non-templates MUST have a non-empty targetFeatureInstanceId.
|
||||
- If the targetFeatureInstanceId differs from the GE owner instance,
|
||||
the user must also have FeatureAccess on that target instance.
|
||||
"""
|
||||
if workflowData.get("isTemplate"):
|
||||
return
|
||||
|
||||
targetId = workflowData.get("targetFeatureInstanceId")
|
||||
if not targetId:
|
||||
return
|
||||
|
||||
if targetId == ownerInstanceId:
|
||||
return
|
||||
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
rootInterface = getRootInterface()
|
||||
targetInstance = rootInterface.getFeatureInstance(targetId)
|
||||
if not targetInstance:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=routeApiMsg("targetFeatureInstanceId refers to a non-existent feature instance"),
|
||||
)
|
||||
targetAccess = rootInterface.getFeatureAccess(str(context.user.id), targetId)
|
||||
if not targetAccess or not targetAccess.enabled:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail=routeApiMsg("Access denied to target feature instance"),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{instanceId}/node-types")
|
||||
@limiter.limit("60/minute")
|
||||
def get_node_types(
|
||||
|
|
@ -318,9 +356,12 @@ async def post_execute(
|
|||
workflowId = body.get("workflowId")
|
||||
req_nodes = graph.get("nodes") or []
|
||||
workflow_for_envelope: Optional[Dict[str, Any]] = None
|
||||
targetFeatureInstanceId: Optional[str] = None
|
||||
if workflowId and not str(workflowId).startswith("transient-"):
|
||||
iface = getGraphicalEditorInterface(context.user, mandateId, instanceId)
|
||||
workflow_for_envelope = iface.getWorkflow(workflowId)
|
||||
if workflow_for_envelope:
|
||||
targetFeatureInstanceId = workflow_for_envelope.get("targetFeatureInstanceId")
|
||||
if workflowId and len(req_nodes) == 0:
|
||||
iface = getGraphicalEditorInterface(context.user, mandateId, instanceId)
|
||||
wf = iface.getWorkflow(workflowId)
|
||||
|
|
@ -328,10 +369,18 @@ async def post_execute(
|
|||
graph = wf["graph"]
|
||||
logger.info("graphicalEditor execute: loaded graph from workflow %s", workflowId)
|
||||
workflow_for_envelope = wf
|
||||
targetFeatureInstanceId = wf.get("targetFeatureInstanceId")
|
||||
if not workflowId:
|
||||
import uuid
|
||||
workflowId = f"transient-{uuid.uuid4().hex[:12]}"
|
||||
logger.info("graphicalEditor execute: using transient workflowId=%s", workflowId)
|
||||
|
||||
if targetFeatureInstanceId and targetFeatureInstanceId != instanceId:
|
||||
_validateTargetInstance(
|
||||
{"targetFeatureInstanceId": targetFeatureInstanceId},
|
||||
instanceId,
|
||||
context,
|
||||
)
|
||||
nodes_count = len(graph.get("nodes") or [])
|
||||
connections_count = len(graph.get("connections") or [])
|
||||
logger.info(
|
||||
|
|
@ -363,6 +412,7 @@ async def post_execute(
|
|||
automation2_interface=ge_interface,
|
||||
run_envelope=run_env,
|
||||
label=_wfLabel,
|
||||
targetFeatureInstanceId=targetFeatureInstanceId,
|
||||
)
|
||||
logger.info(
|
||||
"graphicalEditor execute result: success=%s error=%s nodeOutputs_keys=%s failedNode=%s paused=%s",
|
||||
|
|
@ -1371,6 +1421,7 @@ def create_workflow(
|
|||
) -> dict:
|
||||
"""Create a new workflow."""
|
||||
mandateId = _validateInstanceAccess(instanceId, context)
|
||||
_validateTargetInstance(body, instanceId, context)
|
||||
iface = getGraphicalEditorInterface(context.user, mandateId, instanceId)
|
||||
created = iface.createWorkflow(body)
|
||||
return created
|
||||
|
|
@ -1388,6 +1439,11 @@ def update_workflow(
|
|||
"""Update a workflow."""
|
||||
mandateId = _validateInstanceAccess(instanceId, context)
|
||||
iface = getGraphicalEditorInterface(context.user, mandateId, instanceId)
|
||||
existing = iface.getWorkflow(workflowId)
|
||||
if not existing:
|
||||
raise HTTPException(status_code=404, detail=routeApiMsg("Workflow not found"))
|
||||
merged = {**existing, **body}
|
||||
_validateTargetInstance(merged, instanceId, context)
|
||||
updated = iface.updateWorkflow(workflowId, body)
|
||||
if not updated:
|
||||
raise HTTPException(status_code=404, detail=routeApiMsg("Workflow not found"))
|
||||
|
|
|
|||
|
|
@ -361,6 +361,17 @@ QUICK_ACTIONS = [
|
|||
# The placeholder {{featureInstanceId}} is replaced by _copyTemplateWorkflows.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_FINANCE_STYLE_HINT = (
|
||||
"\n\nWenn du ein Dokument erstellst, verwende einen professionellen Finanz-Stil:\n"
|
||||
"- Schriftart: Calibri\n"
|
||||
"- Primaerfarbe: #1F3864 (Dunkelblau)\n"
|
||||
"- Akzentfarbe: #2980B9\n"
|
||||
"- Tabellen mit dunklem Header (#1F3864, weisse Schrift)\n"
|
||||
"- Konservatives, seriöses Layout\n"
|
||||
"Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen."
|
||||
)
|
||||
|
||||
|
||||
def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]:
|
||||
"""Build a standard analysis graph: trigger -> refreshAccountingData -> ai.prompt."""
|
||||
return {
|
||||
|
|
@ -370,8 +381,9 @@ def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]:
|
|||
"parameters": {"featureInstanceId": "{{featureInstanceId}}", "forceRefresh": False}, "position": {"x": 250, "y": 0}},
|
||||
{"id": "analyse", "type": "ai.prompt", "label": "Analyse", "_method": "ai", "_action": "process",
|
||||
"parameters": {
|
||||
"aiPrompt": prompt,
|
||||
"aiPrompt": prompt + _FINANCE_STYLE_HINT,
|
||||
"context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]},
|
||||
"requireNeutralization": False,
|
||||
"simpleMode": False,
|
||||
}, "position": {"x": 500, "y": 0}},
|
||||
],
|
||||
|
|
@ -440,15 +452,33 @@ TEMPLATE_WORKFLOWS = [
|
|||
{"id": "analyse", "type": "ai.prompt", "label": "Budget-Analyse", "_method": "ai", "_action": "process",
|
||||
"parameters": {
|
||||
"aiPrompt": (
|
||||
"Fuehre einen Budget-Soll/Ist-Vergleich durch.\n"
|
||||
"Die Budget-Datei (Excel) wurde als Dokument uebergeben. "
|
||||
"Die aktuellen Buchhaltungsdaten sind im Kontext verfuegbar.\n"
|
||||
"1. Lies die Soll-Werte aus dem uebergebenen Budget-Dokument\n"
|
||||
"2. Vergleiche sie mit den Ist-Werten aus der Buchhaltung pro Konto\n"
|
||||
"3. Berechne die Abweichung (absolut und prozentual)\n"
|
||||
"4. Erstelle ein Abweichungs-Chart (Balkendiagramm: Soll vs. Ist pro Konto)\n"
|
||||
"5. Markiere kritische Abweichungen (>10%) und gib eine kurze Einschaetzung"
|
||||
"Fuehre einen Budget-Soll/Ist-Vergleich durch und liefere EIN Excel-Dokument "
|
||||
"mit folgender Struktur:\n\n"
|
||||
"1. Tabelle \"Konten-Vergleich\" -- EINE Tabelle, EINE Zeile pro Konto:\n"
|
||||
" Spalten: Konto-Nr | Konto-Name | Soll | Ist | Abweichung absolut | "
|
||||
"Abweichung % | Status (OK / Warnung / Kritisch).\n"
|
||||
"2. EINE Visualisierung \"Soll vs. Ist gesamt\" -- ein einziges "
|
||||
"Balkendiagramm UNTER der Tabelle, das ALLE Konten in einer Grafik "
|
||||
"gegenueberstellt (gruppierte Balken: Soll und Ist je Konto).\n"
|
||||
"3. Kurzer Management-Summary-Absatz (3-5 Saetze) UNTER dem Chart "
|
||||
"mit den 3 groessten Abweichungen (>10%) und einer fachlichen "
|
||||
"Einschaetzung.\n\n"
|
||||
"Verwende die uebergebene Budget-Datei als Soll-Quelle und die im "
|
||||
"Kontext bereitgestellten Buchhaltungsdaten als Ist-Quelle.\n"
|
||||
"WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN "
|
||||
"Uebersichts-Chart ueber alle Konten ist gewuenscht.\n\n"
|
||||
"Hinweis: Das documentTheme ist 'finance'. Wenn du ein Dokument erstellst, "
|
||||
"verwende einen professionellen Finanz-Stil:\n"
|
||||
"- Schriftart: Calibri\n"
|
||||
"- Primaerfarbe: #1F3864 (Dunkelblau)\n"
|
||||
"- Akzentfarbe: #2980B9\n"
|
||||
"- Tabellen mit dunklem Header (#1F3864, weisse Schrift)\n"
|
||||
"- Konservatives, seriöses Layout\n"
|
||||
"Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen."
|
||||
),
|
||||
"resultType": "xlsx",
|
||||
"documentTheme": "finance",
|
||||
"requireNeutralization": False,
|
||||
"documentList": {"type": "ref", "nodeId": "trigger", "path": ["payload", "documentList"]},
|
||||
"context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]},
|
||||
"simpleMode": False,
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
# All rights reserved.
|
||||
"""Workspace feature data models — WorkspaceUserSettings."""
|
||||
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional
|
||||
from pydantic import Field
|
||||
from modules.datamodels.datamodelBase import PowerOnModel
|
||||
from modules.shared.i18nRegistry import i18nModel
|
||||
import uuid
|
||||
|
|
@ -52,3 +52,18 @@ class WorkspaceUserSettings(PowerOnModel):
|
|||
description="Max agent rounds override (None = instance default)",
|
||||
json_schema_extra={"label": "Max. Agenten-Runden", "frontend_type": "number", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
requireNeutralization: bool = Field(
|
||||
default=False,
|
||||
description="Default neutralization setting for this user",
|
||||
json_schema_extra={"label": "Neutralisierung", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
allowedProviders: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="Allowed AI providers (empty = all permitted by RBAC)",
|
||||
json_schema_extra={"label": "Erlaubte Provider", "frontend_type": "multiselect", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
allowedModels: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="Allowed AI models (empty = all permitted)",
|
||||
json_schema_extra={"label": "Erlaubte Modelle", "frontend_type": "modelMultiSelect", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ class WorkspaceInputRequest(BaseModel):
|
|||
workflowId: Optional[str] = Field(default=None, description="Continue existing workflow")
|
||||
userLanguage: str = Field(default="en", description="User language code")
|
||||
allowedProviders: List[str] = Field(default_factory=list, description="Restrict AI to these providers")
|
||||
allowedModels: List[str] = Field(default_factory=list, description="Restrict AI to these models")
|
||||
requireNeutralization: Optional[bool] = Field(default=None, description="Per-request neutralization override")
|
||||
|
||||
|
||||
|
|
@ -635,6 +636,7 @@ async def streamWorkspaceStart(
|
|||
userLanguage=userInput.userLanguage,
|
||||
instanceConfig=instanceConfig,
|
||||
allowedProviders=userInput.allowedProviders,
|
||||
allowedModels=userInput.allowedModels,
|
||||
requireNeutralization=userInput.requireNeutralization,
|
||||
billingFeatureCode=wsBillingFeatureCode,
|
||||
)
|
||||
|
|
@ -692,6 +694,7 @@ async def _runWorkspaceAgent(
|
|||
userLanguage: str = "en",
|
||||
instanceConfig: Dict[str, Any] = None,
|
||||
allowedProviders: List[str] = None,
|
||||
allowedModels: List[str] = None,
|
||||
requireNeutralization: Optional[bool] = None,
|
||||
billingFeatureCode: Optional[str] = None,
|
||||
):
|
||||
|
|
@ -715,6 +718,9 @@ async def _runWorkspaceAgent(
|
|||
logger.info(f"Workspace agent: allowedProviders={allowedProviders}")
|
||||
else:
|
||||
logger.debug("Workspace agent: no allowedProviders in request")
|
||||
if allowedModels:
|
||||
aiService.services.allowedModels = allowedModels
|
||||
logger.info(f"Workspace agent: allowedModels={allowedModels}")
|
||||
if requireNeutralization is not None:
|
||||
ctx.requireNeutralization = requireNeutralization
|
||||
|
||||
|
|
@ -1202,7 +1208,7 @@ async def patchWorkspaceWorkflowAttachments(
|
|||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# File and folder list endpoints
|
||||
# File endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/{instanceId}/files")
|
||||
|
|
@ -1210,7 +1216,6 @@ async def patchWorkspaceWorkflowAttachments(
|
|||
async def listWorkspaceFiles(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
folderId: Optional[str] = Query(None),
|
||||
tags: Optional[str] = Query(None),
|
||||
search: Optional[str] = Query(None),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
|
|
@ -1265,30 +1270,6 @@ async def getFileContent(
|
|||
return Response(content=content, media_type=mimeType)
|
||||
|
||||
|
||||
@router.get("/{instanceId}/folders")
|
||||
@limiter.limit("300/minute")
|
||||
async def listWorkspaceFolders(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
parentId: Optional[str] = Query(None),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
_mandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
try:
|
||||
from modules.serviceCenter import getService
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
ctx = ServiceCenterContext(
|
||||
user=context.user,
|
||||
mandate_id=_mandateId or "",
|
||||
feature_instance_id=instanceId,
|
||||
)
|
||||
chatService = getService("chat", ctx)
|
||||
folders = chatService.listFolders(parentId=parentId)
|
||||
return JSONResponse({"folders": folders or []})
|
||||
except Exception:
|
||||
return JSONResponse({"folders": []})
|
||||
|
||||
|
||||
@router.get("/{instanceId}/datasources")
|
||||
@limiter.limit("300/minute")
|
||||
async def listWorkspaceDataSources(
|
||||
|
|
@ -2139,6 +2120,76 @@ async def updateGeneralSettings(
|
|||
return await getGeneralSettings(request, instanceId, context)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# User-level AI settings (neutralisation, providers, models)
|
||||
# =========================================================================
|
||||
|
||||
@router.get("/{instanceId}/user-settings")
|
||||
@limiter.limit("120/minute")
|
||||
async def getWorkspaceUserSettings(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Get the current user's workspace AI settings (auto-creates with defaults if not exists)."""
|
||||
_mandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
wsInterface = _getWorkspaceInterface(context, instanceId)
|
||||
userId = str(context.user.id)
|
||||
|
||||
settings = wsInterface.getWorkspaceUserSettings(userId)
|
||||
if settings:
|
||||
return JSONResponse({
|
||||
"requireNeutralization": settings.requireNeutralization,
|
||||
"allowedProviders": settings.allowedProviders,
|
||||
"allowedModels": settings.allowedModels,
|
||||
})
|
||||
|
||||
data = {
|
||||
"userId": userId,
|
||||
"mandateId": str(context.mandateId) if context.mandateId else "",
|
||||
"featureInstanceId": instanceId,
|
||||
}
|
||||
created = wsInterface.saveWorkspaceUserSettings(data)
|
||||
return JSONResponse({
|
||||
"requireNeutralization": created.requireNeutralization,
|
||||
"allowedProviders": created.allowedProviders,
|
||||
"allowedModels": created.allowedModels,
|
||||
})
|
||||
|
||||
|
||||
@router.put("/{instanceId}/user-settings")
|
||||
@limiter.limit("120/minute")
|
||||
async def putWorkspaceUserSettings(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
body: dict = Body(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Save the current user's workspace AI settings."""
|
||||
_mandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
wsInterface = _getWorkspaceInterface(context, instanceId)
|
||||
userId = str(context.user.id)
|
||||
|
||||
data = {
|
||||
"userId": userId,
|
||||
"mandateId": str(context.mandateId) if context.mandateId else "",
|
||||
"featureInstanceId": instanceId,
|
||||
}
|
||||
if "requireNeutralization" in body:
|
||||
data["requireNeutralization"] = bool(body["requireNeutralization"])
|
||||
if "allowedProviders" in body:
|
||||
data["allowedProviders"] = body["allowedProviders"]
|
||||
if "allowedModels" in body:
|
||||
data["allowedModels"] = body["allowedModels"]
|
||||
|
||||
saved = wsInterface.saveWorkspaceUserSettings(data)
|
||||
return JSONResponse({
|
||||
"requireNeutralization": saved.requireNeutralization,
|
||||
"allowedProviders": saved.allowedProviders,
|
||||
"allowedModels": saved.allowedModels,
|
||||
})
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
|
||||
# =========================================================================
|
||||
|
|
|
|||
198
modules/interfaces/_legacyMigrationTelemetry.py
Normal file
198
modules/interfaces/_legacyMigrationTelemetry.py
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Lightweight Bootstrap-Telemetrie fuer entfernte Migrationsroutinen.
|
||||
|
||||
Wenn eine idempotente Bootstrap-Migration (z.B. ``_migrateAndDropSysAdminRole``)
|
||||
aus dem Boot-Pfad entfernt wird, koennte ein theoretischer Edge-Case (alte
|
||||
DB-Restore, manueller INSERT) wieder Legacy-Daten ins System bringen. Damit das
|
||||
nicht still bleibt, ruft ``initBootstrap`` nach Abschluss aller Init-Schritte
|
||||
einmalig ``runLegacyDataChecks`` auf -- das logged WARN bei Restbestand.
|
||||
|
||||
Designprinzipien:
|
||||
- KEINE Schreibzugriffe (rein lesend).
|
||||
- Process-lokal gecached (``_cache``), damit identische Boots/Reloads den Check
|
||||
nur einmal laufen lassen.
|
||||
- Pro Check eine Recordset-Abfrage; Ausnahmen werden als WARN geloggt, nicht
|
||||
re-raised, damit Telemetrie den Boot nie crasht.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||
from modules.datamodels.datamodelRbac import Role
|
||||
from modules.datamodels.datamodelUam import Mandate
|
||||
from modules.shared.mandateNameUtils import isValidMandateName
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_alreadyRan: bool = False
|
||||
|
||||
|
||||
def runLegacyDataChecks(db: DatabaseConnector) -> None:
|
||||
"""Logged WARN, falls noch Legacy-Daten existieren, die durch entfernte
|
||||
Migrationsroutinen behandelt wurden. Prozessweit nur einmal aktiv.
|
||||
|
||||
Aufruf: am Ende von ``initBootstrap``.
|
||||
"""
|
||||
global _alreadyRan
|
||||
if _alreadyRan:
|
||||
return
|
||||
_alreadyRan = True
|
||||
|
||||
_checkMandateDescription(db)
|
||||
_checkMandateSlugRules(db)
|
||||
_checkLegacyRootMandate(db)
|
||||
_checkSysadminRole(db)
|
||||
_backfillTargetFeatureInstanceId()
|
||||
|
||||
|
||||
def _safe(checkName: str, fn) -> Any:
|
||||
try:
|
||||
return fn()
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Legacy-data telemetry check '%s' failed: %s: %s",
|
||||
checkName, type(exc).__name__, exc,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _checkMandateDescription(db: DatabaseConnector) -> None:
|
||||
def _do() -> None:
|
||||
rows = db.getRecordset(Mandate)
|
||||
bad = [
|
||||
r.get("id") for r in rows
|
||||
if r.get("description") and not r.get("label")
|
||||
]
|
||||
if bad:
|
||||
logger.warning(
|
||||
"Legacy-data check: %d Mandate row(s) still have description "
|
||||
"but empty label (removed migration: _migrateMandateDescriptionToLabel). "
|
||||
"Run scripts/script_db_audit_legacy_state.py for details. IDs: %s",
|
||||
len(bad), bad[:5],
|
||||
)
|
||||
|
||||
_safe("mandate-description", _do)
|
||||
|
||||
|
||||
def _checkMandateSlugRules(db: DatabaseConnector) -> None:
|
||||
def _do() -> None:
|
||||
rows = db.getRecordset(Mandate)
|
||||
seen: set[str] = set()
|
||||
bad: list[str] = []
|
||||
for r in sorted(rows, key=lambda x: str(x.get("id", ""))):
|
||||
mid = r.get("id")
|
||||
if not mid:
|
||||
continue
|
||||
name = (r.get("name") or "").strip()
|
||||
labelRaw = r.get("label")
|
||||
labelEmpty = not (labelRaw or "").strip() if labelRaw is not None else True
|
||||
invalid = not isValidMandateName(name)
|
||||
collides = name in seen
|
||||
if not invalid and not collides:
|
||||
seen.add(name)
|
||||
if labelEmpty or invalid or collides:
|
||||
bad.append(str(mid))
|
||||
if bad:
|
||||
logger.warning(
|
||||
"Legacy-data check: %d Mandate row(s) violate slug/label rules "
|
||||
"(removed migration: _migrateMandateNameLabelSlugRules). "
|
||||
"Run scripts/script_db_audit_legacy_state.py for details. IDs: %s",
|
||||
len(bad), bad[:5],
|
||||
)
|
||||
|
||||
_safe("mandate-slug-rules", _do)
|
||||
|
||||
|
||||
def _checkLegacyRootMandate(db: DatabaseConnector) -> None:
|
||||
def _do() -> None:
|
||||
legacy = db.getRecordset(Mandate, recordFilter={"name": "Root"})
|
||||
rootRows = db.getRecordset(Mandate, recordFilter={"name": "root"})
|
||||
legacyByFlag = [r for r in rootRows if not r.get("isSystem")]
|
||||
all_ = list(legacy) + legacyByFlag
|
||||
if all_:
|
||||
logger.warning(
|
||||
"Legacy-data check: %d Root-Mandate row(s) still in legacy form "
|
||||
"(removed migration: initRootMandate-legacy-branch). IDs: %s",
|
||||
len(all_), [r.get("id") for r in all_][:5],
|
||||
)
|
||||
|
||||
_safe("root-mandate-legacy", _do)
|
||||
|
||||
|
||||
def _checkSysadminRole(db: DatabaseConnector) -> None:
|
||||
def _do() -> None:
|
||||
rootMandates = db.getRecordset(
|
||||
Mandate, recordFilter={"name": "root", "isSystem": True}
|
||||
)
|
||||
if not rootMandates:
|
||||
return
|
||||
rootId = str(rootMandates[0].get("id"))
|
||||
rows = db.getRecordset(
|
||||
Role,
|
||||
recordFilter={
|
||||
"roleLabel": "sysadmin",
|
||||
"mandateId": rootId,
|
||||
"featureInstanceId": None,
|
||||
},
|
||||
)
|
||||
if rows:
|
||||
logger.warning(
|
||||
"Legacy-data check: %d 'sysadmin' role(s) still present in root mandate "
|
||||
"(removed migration: _migrateAndDropSysAdminRole). "
|
||||
"Authority is now User.isPlatformAdmin -- migrate manually. IDs: %s",
|
||||
len(rows), [r.get("id") for r in rows],
|
||||
)
|
||||
|
||||
_safe("sysadmin-role", _do)
|
||||
|
||||
|
||||
def _backfillTargetFeatureInstanceId() -> None:
|
||||
"""Idempotent backfill: set targetFeatureInstanceId = featureInstanceId
|
||||
for all non-template AutoWorkflow rows where it is still NULL.
|
||||
|
||||
Connects to ``poweron_graphicaleditor`` independently.
|
||||
"""
|
||||
def _do() -> None:
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import AutoWorkflow
|
||||
|
||||
dbHost = APP_CONFIG.get("DB_HOST", "localhost")
|
||||
dbUser = APP_CONFIG.get("DB_USER")
|
||||
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD")
|
||||
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
|
||||
geDb = DatabaseConnector(
|
||||
dbHost=dbHost,
|
||||
dbDatabase="poweron_graphicaleditor",
|
||||
dbUser=dbUser,
|
||||
dbPassword=dbPassword,
|
||||
dbPort=dbPort,
|
||||
userId=None,
|
||||
)
|
||||
if not geDb._ensureTableExists(AutoWorkflow):
|
||||
return
|
||||
|
||||
rows = geDb.getRecordset(AutoWorkflow) or []
|
||||
backfilled = 0
|
||||
for r in rows:
|
||||
if r.get("isTemplate"):
|
||||
continue
|
||||
if r.get("targetFeatureInstanceId"):
|
||||
continue
|
||||
srcId = r.get("featureInstanceId")
|
||||
if not srcId:
|
||||
continue
|
||||
geDb.recordModify(AutoWorkflow, r["id"], {"targetFeatureInstanceId": srcId})
|
||||
backfilled += 1
|
||||
|
||||
if backfilled:
|
||||
logger.info(
|
||||
"targetFeatureInstanceId backfill: set %d non-template AutoWorkflow row(s) "
|
||||
"to their featureInstanceId",
|
||||
backfilled,
|
||||
)
|
||||
|
||||
_safe("backfill-targetFeatureInstanceId", _do)
|
||||
|
|
@ -111,6 +111,19 @@ class AiObjects:
|
|||
processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1,
|
||||
)
|
||||
|
||||
allowedModels = getattr(options, 'allowedModels', None) if options else None
|
||||
if allowedModels:
|
||||
filteredModels = [m for m in availableModels if m.name in allowedModels]
|
||||
if filteredModels:
|
||||
availableModels = filteredModels
|
||||
else:
|
||||
errorMsg = f"No models match allowedModels {allowedModels} (providers={allowedProviders}) for operation {options.operationType}"
|
||||
logger.error(errorMsg)
|
||||
return AiCallResponse(
|
||||
content=errorMsg, modelName="error", priceCHF=0.0,
|
||||
processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1,
|
||||
)
|
||||
|
||||
failoverModelList = modelSelector.getFailoverModelList(prompt, context, options, availableModels)
|
||||
|
||||
if not failoverModelList:
|
||||
|
|
@ -364,6 +377,19 @@ class AiObjects:
|
|||
)
|
||||
return
|
||||
|
||||
allowedModels = getattr(options, 'allowedModels', None) if options else None
|
||||
if allowedModels:
|
||||
filtered = [m for m in availableModels if m.name in allowedModels]
|
||||
if filtered:
|
||||
availableModels = filtered
|
||||
else:
|
||||
yield AiCallResponse(
|
||||
content=f"No models match allowedModels {allowedModels} (providers={allowedProviders}) for operation {options.operationType}",
|
||||
modelName="error", priceCHF=0.0, processingTime=0.0,
|
||||
bytesSent=0, bytesReceived=0, errorCount=1,
|
||||
)
|
||||
return
|
||||
|
||||
failoverModelList = modelSelector.getFailoverModelList(
|
||||
request.prompt, request.context or "", options, availableModels
|
||||
)
|
||||
|
|
@ -516,6 +542,14 @@ class AiObjects:
|
|||
else:
|
||||
logger.warning(f"No embedding models match allowedProviders {allowedProviders}")
|
||||
|
||||
allowedModels = getattr(options, 'allowedModels', None) if options else None
|
||||
if allowedModels:
|
||||
filtered = [m for m in availableModels if m.name in allowedModels]
|
||||
if filtered:
|
||||
availableModels = filtered
|
||||
else:
|
||||
logger.warning(f"No embedding models match allowedModels {allowedModels}")
|
||||
|
||||
failoverModelList = modelSelector.getFailoverModelList(
|
||||
combinedText, "", options, availableModels
|
||||
)
|
||||
|
|
|
|||
|
|
@ -56,14 +56,8 @@ def initBootstrap(db: DatabaseConnector) -> None:
|
|||
|
||||
logger.info("Starting system bootstrap")
|
||||
|
||||
# Initialize root mandate
|
||||
mandateId = initRootMandate(db)
|
||||
|
||||
# Migrate existing mandate records: description -> label
|
||||
_migrateMandateDescriptionToLabel(db)
|
||||
_migrateMandateNameLabelSlugRules(db)
|
||||
|
||||
# Clean up duplicate roles and fix corrupted templates FIRST
|
||||
_deduplicateRoles(db)
|
||||
|
||||
# Initialize system role TEMPLATES (mandateId=None, isSystemRole=True)
|
||||
|
|
@ -76,14 +70,6 @@ def initBootstrap(db: DatabaseConnector) -> None:
|
|||
# This also serves as migration for existing mandates that don't have instance roles yet
|
||||
_ensureAllMandatesHaveSystemRoles(db)
|
||||
|
||||
# Migration: eliminate the legacy ``sysadmin`` role in root mandate
|
||||
# (replaced by ``User.isPlatformAdmin`` flag — see
|
||||
# wiki/c-work/4-done/2026-04-sysadmin-authority-split.md).
|
||||
# Idempotent: noop after first successful run.
|
||||
if mandateId:
|
||||
_migrateAndDropSysAdminRole(db, mandateId)
|
||||
|
||||
# Ensure UI rules for navigation items (admin/user/viewer roles)
|
||||
_ensureUiContextRules(db)
|
||||
|
||||
# Initialize admin user
|
||||
|
|
@ -129,9 +115,22 @@ def initBootstrap(db: DatabaseConnector) -> None:
|
|||
# Bootstrap system workflow templates for graphical editor
|
||||
_bootstrapSystemTemplates(db)
|
||||
|
||||
# Sync feature template workflows (update graph of existing instance workflows
|
||||
# whose templateSourceId matches a current code-defined template)
|
||||
_syncFeatureTemplateWorkflows()
|
||||
|
||||
# Ensure billing settings and accounts exist for all mandates
|
||||
_bootstrapBilling()
|
||||
|
||||
# Telemetrie: warne falls Restbestaende der entfernten idempotenten
|
||||
# Migrationen wieder auftauchen (Edge-Case: alter DB-Restore o.ae.).
|
||||
# Schreibt nicht, scheitert nicht den Boot.
|
||||
try:
|
||||
from modules.interfaces._legacyMigrationTelemetry import runLegacyDataChecks
|
||||
runLegacyDataChecks(db)
|
||||
except Exception as e:
|
||||
logger.warning(f"Legacy-data telemetry skipped: {e}")
|
||||
|
||||
|
||||
def _bootstrapBilling() -> None:
|
||||
"""
|
||||
|
|
@ -195,6 +194,97 @@ def _bootstrapSystemTemplates(db: DatabaseConnector) -> None:
|
|||
logger.warning(f"System workflow template bootstrap failed: {e}")
|
||||
|
||||
|
||||
def _syncFeatureTemplateWorkflows() -> None:
|
||||
"""Sync existing instance-scoped workflows with current code-defined templates.
|
||||
|
||||
For each feature that exposes getTemplateWorkflows(), find all AutoWorkflow
|
||||
rows whose templateSourceId matches a template ID and update their graph
|
||||
if the code-defined version has changed. Preserves instance-specific
|
||||
fields (label, tags, targetFeatureInstanceId, invocations, active).
|
||||
Idempotent, runs on every boot.
|
||||
"""
|
||||
import json
|
||||
|
||||
try:
|
||||
from modules.system.registry import loadFeatureMainModules
|
||||
from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import AutoWorkflow
|
||||
from modules.features.graphicalEditor.interfaceFeatureGraphicalEditor import graphicalEditorDatabase
|
||||
|
||||
mainModules = loadFeatureMainModules()
|
||||
|
||||
templatesBySourceId: dict = {}
|
||||
for featureCode, mod in mainModules.items():
|
||||
getTemplateWorkflows = getattr(mod, "getTemplateWorkflows", None)
|
||||
if not getTemplateWorkflows:
|
||||
continue
|
||||
try:
|
||||
templates = getTemplateWorkflows() or []
|
||||
except Exception:
|
||||
continue
|
||||
for tpl in templates:
|
||||
tplId = tpl.get("id")
|
||||
if tplId:
|
||||
templatesBySourceId[tplId] = tpl
|
||||
|
||||
if not templatesBySourceId:
|
||||
logger.info("_syncFeatureTemplateWorkflows: no templates found, skipping")
|
||||
return
|
||||
logger.info(f"_syncFeatureTemplateWorkflows: found {len(templatesBySourceId)} template(s): {list(templatesBySourceId.keys())}")
|
||||
|
||||
greenfieldDb = DatabaseConnector(
|
||||
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||
dbDatabase=graphicalEditorDatabase,
|
||||
dbUser=APP_CONFIG.get("DB_USER"),
|
||||
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD"),
|
||||
)
|
||||
|
||||
updated = 0
|
||||
for sourceId, tpl in templatesBySourceId.items():
|
||||
instances = greenfieldDb.getRecordset(AutoWorkflow, recordFilter={
|
||||
"templateSourceId": sourceId,
|
||||
"isTemplate": False,
|
||||
})
|
||||
if not instances:
|
||||
continue
|
||||
|
||||
canonicalGraph = tpl.get("graph", {})
|
||||
|
||||
for inst in instances:
|
||||
instId = inst.get("id") if isinstance(inst, dict) else getattr(inst, "id", None)
|
||||
targetInstanceId = (
|
||||
inst.get("targetFeatureInstanceId") if isinstance(inst, dict)
|
||||
else getattr(inst, "targetFeatureInstanceId", None)
|
||||
) or ""
|
||||
|
||||
graphJson = json.dumps(canonicalGraph)
|
||||
graphJson = graphJson.replace("{{featureInstanceId}}", targetInstanceId)
|
||||
newGraph = json.loads(graphJson)
|
||||
|
||||
existingGraph = inst.get("graph") if isinstance(inst, dict) else getattr(inst, "graph", None)
|
||||
if isinstance(existingGraph, str):
|
||||
try:
|
||||
existingGraph = json.loads(existingGraph)
|
||||
except Exception:
|
||||
existingGraph = None
|
||||
|
||||
if existingGraph == newGraph:
|
||||
logger.debug(f"_syncFeatureTemplateWorkflows: graph unchanged for workflow {instId} (template={sourceId})")
|
||||
continue
|
||||
logger.debug(f"_syncFeatureTemplateWorkflows: graph DIFFERS for workflow {instId} (template={sourceId}), updating")
|
||||
|
||||
greenfieldDb.recordModify(AutoWorkflow, instId, {"graph": newGraph})
|
||||
updated += 1
|
||||
logger.info(f"_syncFeatureTemplateWorkflows: updated graph for workflow {instId} (template={sourceId})")
|
||||
|
||||
if updated:
|
||||
logger.info(f"_syncFeatureTemplateWorkflows: synced {updated} workflow(s) with current templates")
|
||||
else:
|
||||
logger.info("_syncFeatureTemplateWorkflows: all instance graphs already match current templates")
|
||||
greenfieldDb.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Feature template workflow sync failed: {e}")
|
||||
|
||||
|
||||
def _buildSystemTemplates():
|
||||
"""Build the graph definitions for platform system templates."""
|
||||
return [
|
||||
|
|
@ -396,21 +486,12 @@ def initRootMandate(db: DatabaseConnector) -> Optional[str]:
|
|||
Returns:
|
||||
Mandate ID if created or found, None otherwise
|
||||
"""
|
||||
# Find existing root mandate by name AND isSystem flag
|
||||
existingMandates = db.getRecordset(Mandate, recordFilter={"name": "root", "isSystem": True})
|
||||
if existingMandates:
|
||||
mandateId = existingMandates[0].get("id")
|
||||
logger.info(f"Root mandate already exists with ID {mandateId}")
|
||||
return mandateId
|
||||
|
||||
# Check for legacy root mandates (name="Root" without isSystem flag) and migrate
|
||||
legacyMandates = db.getRecordset(Mandate, recordFilter={"name": "Root"})
|
||||
if legacyMandates:
|
||||
mandateId = legacyMandates[0].get("id")
|
||||
logger.info(f"Migrating legacy Root mandate {mandateId}: setting name='root', isSystem=True")
|
||||
db.recordModify(Mandate, mandateId, {"name": "root", "isSystem": True})
|
||||
return mandateId
|
||||
|
||||
|
||||
logger.info("Creating Root mandate")
|
||||
rootMandate = Mandate(name="root", label="Root", isSystem=True, enabled=True)
|
||||
createdMandate = db.recordCreate(Mandate, rootMandate)
|
||||
|
|
@ -419,98 +500,6 @@ def initRootMandate(db: DatabaseConnector) -> Optional[str]:
|
|||
return mandateId
|
||||
|
||||
|
||||
def _migrateMandateDescriptionToLabel(db: DatabaseConnector) -> None:
|
||||
"""
|
||||
Migration: Rename 'description' field to 'label' in all Mandate records.
|
||||
Copies existing 'description' values to 'label' and removes the old field.
|
||||
Safe to run multiple times (idempotent).
|
||||
"""
|
||||
allMandates = db.getRecordset(Mandate)
|
||||
migratedCount = 0
|
||||
for mandateRecord in allMandates:
|
||||
mandateId = mandateRecord.get("id")
|
||||
hasDescription = "description" in mandateRecord and mandateRecord.get("description") is not None
|
||||
hasLabel = "label" in mandateRecord and mandateRecord.get("label") is not None
|
||||
|
||||
if hasDescription and not hasLabel:
|
||||
# Copy description to label
|
||||
updateData = {"label": mandateRecord["description"]}
|
||||
db.recordModify(Mandate, mandateId, updateData)
|
||||
migratedCount += 1
|
||||
logger.info(f"Migrated mandate {mandateId}: description -> label")
|
||||
|
||||
if migratedCount > 0:
|
||||
logger.info(f"Migrated {migratedCount} mandate(s) from description to label")
|
||||
else:
|
||||
logger.debug("No mandate description->label migration needed")
|
||||
|
||||
|
||||
def _migrateMandateNameLabelSlugRules(db: DatabaseConnector) -> None:
|
||||
"""
|
||||
Migration: normalize Mandate.name to the slug rules ([a-z0-9-], length 2..32, single
|
||||
hyphen segments) and ensure Mandate.label is non-empty.
|
||||
|
||||
Rules (see wiki/c-work/1-plan/2026-04-mandate-name-label-logic.md):
|
||||
1. If ``label`` is empty/None → set ``label := name`` (or "Mandate" when both empty).
|
||||
2. If ``name`` is not a valid slug, or collides with an earlier mandate in stable id
|
||||
order, allocate a unique slug from the (now non-empty) ``label`` using
|
||||
``slugifyMandateName`` + ``allocateUniqueMandateSlug``.
|
||||
|
||||
Idempotent: a second run is a no-op because all valid names stay valid and stay unique.
|
||||
Each rename and label fill-in is logged for audit.
|
||||
"""
|
||||
from modules.shared.mandateNameUtils import (
|
||||
allocateUniqueMandateSlug,
|
||||
isValidMandateName,
|
||||
slugifyMandateName,
|
||||
)
|
||||
|
||||
allRows = db.getRecordset(Mandate)
|
||||
if not allRows:
|
||||
return
|
||||
sortedRows = sorted(allRows, key=lambda r: str(r.get("id", "")))
|
||||
|
||||
used: set[str] = set()
|
||||
labelFills = 0
|
||||
nameRenames: list[tuple[str, str, str]] = []
|
||||
|
||||
for rec in sortedRows:
|
||||
mid = rec.get("id")
|
||||
if not mid:
|
||||
continue
|
||||
name = (rec.get("name") or "").strip()
|
||||
labelRaw = rec.get("label")
|
||||
label = (labelRaw or "").strip() if labelRaw is not None else ""
|
||||
|
||||
if not label:
|
||||
label = name if name else "Mandate"
|
||||
db.recordModify(Mandate, mid, {"label": label})
|
||||
labelFills += 1
|
||||
logger.info(f"Mandate {mid}: filled empty label with '{label}'")
|
||||
|
||||
nameFits = isValidMandateName(name)
|
||||
nameCollides = name in used
|
||||
if nameFits and not nameCollides:
|
||||
used.add(name)
|
||||
continue
|
||||
|
||||
base = slugifyMandateName(label) or "mn"
|
||||
newName = allocateUniqueMandateSlug(base, used)
|
||||
used.add(newName)
|
||||
if newName != name:
|
||||
db.recordModify(Mandate, mid, {"name": newName})
|
||||
nameRenames.append((str(mid), name, newName))
|
||||
logger.info(f"Mandate {mid}: renamed name '{name}' -> '{newName}'")
|
||||
|
||||
if labelFills or nameRenames:
|
||||
logger.info(
|
||||
"Mandate name/label slug migration: %d label fill-in(s), %d name rename(s)",
|
||||
labelFills, len(nameRenames),
|
||||
)
|
||||
else:
|
||||
logger.debug("No mandate name/label slug migration needed")
|
||||
|
||||
|
||||
def initAdminUser(db: DatabaseConnector, mandateId: Optional[str]) -> Optional[str]:
|
||||
"""
|
||||
Creates the Admin user if it doesn't exist.
|
||||
|
|
@ -837,101 +826,6 @@ def copySystemRolesToMandate(db: DatabaseConnector, mandateId: str) -> int:
|
|||
return copiedCount
|
||||
|
||||
|
||||
def _migrateAndDropSysAdminRole(db: DatabaseConnector, mandateId: str) -> None:
|
||||
"""
|
||||
One-shot migration: eliminate the legacy ``sysadmin`` role in the root mandate.
|
||||
|
||||
Authority semantics moved to two orthogonal flags on User:
|
||||
- ``isSysAdmin`` → Infrastructure-Operator (RBAC bypass)
|
||||
- ``isPlatformAdmin`` → Cross-Mandate-Governance (no bypass)
|
||||
|
||||
Migration steps (idempotent):
|
||||
1. Find sysadmin role(s) in root mandate. If none exist → done.
|
||||
2. For every UserMandateRole row referencing such a role: set
|
||||
``user.isPlatformAdmin = True`` (preserves cross-mandate authority).
|
||||
3. Delete those UserMandateRole rows.
|
||||
4. Delete AccessRules attached to the sysadmin role.
|
||||
5. Delete the sysadmin Role record.
|
||||
|
||||
Args:
|
||||
db: Database connector instance
|
||||
mandateId: Root mandate ID
|
||||
"""
|
||||
sysadminRoles = db.getRecordset(
|
||||
Role,
|
||||
recordFilter={"roleLabel": "sysadmin", "mandateId": mandateId, "featureInstanceId": None},
|
||||
)
|
||||
if not sysadminRoles:
|
||||
logger.debug("Sysadmin role migration: no legacy sysadmin role present, nothing to do")
|
||||
return
|
||||
|
||||
sysadminRoleIds = [str(r.get("id")) for r in sysadminRoles if r.get("id")]
|
||||
logger.warning(
|
||||
f"Sysadmin role migration: found {len(sysadminRoleIds)} legacy sysadmin role(s) "
|
||||
f"in root mandate, migrating to isPlatformAdmin flag"
|
||||
)
|
||||
|
||||
# 1) Promote every holder to isPlatformAdmin=True
|
||||
promoted = 0
|
||||
for sysadminRoleId in sysadminRoleIds:
|
||||
umRoleRows = db.getRecordset(
|
||||
UserMandateRole, recordFilter={"roleId": sysadminRoleId}
|
||||
)
|
||||
userMandateIds = [str(r.get("userMandateId")) for r in umRoleRows if r.get("userMandateId")]
|
||||
if not userMandateIds:
|
||||
continue
|
||||
|
||||
# Resolve userIds via UserMandate
|
||||
userIds = set()
|
||||
for umId in userMandateIds:
|
||||
ums = db.getRecordset(UserMandate, recordFilter={"id": umId})
|
||||
for um in ums:
|
||||
uid = um.get("userId") if isinstance(um, dict) else getattr(um, "userId", None)
|
||||
if uid:
|
||||
userIds.add(str(uid))
|
||||
|
||||
for userId in userIds:
|
||||
users = db.getRecordset(UserInDB, recordFilter={"id": userId})
|
||||
if not users:
|
||||
continue
|
||||
current = users[0].get("isPlatformAdmin", False)
|
||||
if not current:
|
||||
db.recordModify(UserInDB, userId, {"isPlatformAdmin": True})
|
||||
promoted += 1
|
||||
logger.warning(
|
||||
f"Sysadmin role migration: granted isPlatformAdmin=True to user {userId}"
|
||||
)
|
||||
|
||||
# 2) Delete UserMandateRole rows
|
||||
for umRow in umRoleRows:
|
||||
rowId = umRow.get("id") if isinstance(umRow, dict) else getattr(umRow, "id", None)
|
||||
if rowId:
|
||||
try:
|
||||
db.recordDelete(UserMandateRole, str(rowId))
|
||||
except Exception as e:
|
||||
logger.error(f"Sysadmin role migration: failed to drop UserMandateRole {rowId}: {e}")
|
||||
|
||||
# 3) Delete AccessRules
|
||||
accessRules = db.getRecordset(AccessRule, recordFilter={"roleId": sysadminRoleId})
|
||||
for ar in accessRules:
|
||||
arId = ar.get("id") if isinstance(ar, dict) else getattr(ar, "id", None)
|
||||
if arId:
|
||||
try:
|
||||
db.recordDelete(AccessRule, str(arId))
|
||||
except Exception as e:
|
||||
logger.error(f"Sysadmin role migration: failed to drop AccessRule {arId}: {e}")
|
||||
|
||||
# 4) Delete the Role
|
||||
try:
|
||||
db.recordDelete(Role, sysadminRoleId)
|
||||
except Exception as e:
|
||||
logger.error(f"Sysadmin role migration: failed to drop Role {sysadminRoleId}: {e}")
|
||||
|
||||
logger.warning(
|
||||
f"Sysadmin role migration: completed; promoted {promoted} user(s) to isPlatformAdmin"
|
||||
)
|
||||
|
||||
|
||||
def _getRoleId(db: DatabaseConnector, roleLabel: str) -> Optional[str]:
|
||||
"""
|
||||
Get role ID by label, using cache or database lookup.
|
||||
|
|
|
|||
|
|
@ -1268,19 +1268,7 @@ class AppObjects:
|
|||
result = []
|
||||
for conn_dict in connections:
|
||||
try:
|
||||
# Create UserConnection object
|
||||
connection = UserConnection(
|
||||
id=conn_dict["id"],
|
||||
userId=conn_dict["userId"],
|
||||
authority=conn_dict.get("authority"),
|
||||
externalId=conn_dict.get("externalId", ""),
|
||||
externalUsername=conn_dict.get("externalUsername", ""),
|
||||
externalEmail=conn_dict.get("externalEmail"),
|
||||
status=conn_dict.get("status", "pending"),
|
||||
connectedAt=conn_dict.get("connectedAt"),
|
||||
lastChecked=conn_dict.get("lastChecked"),
|
||||
expiresAt=conn_dict.get("expiresAt"),
|
||||
)
|
||||
connection = UserConnection.model_validate(conn_dict)
|
||||
result.append(connection)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
|
|
@ -1293,6 +1281,28 @@ class AppObjects:
|
|||
logger.error(f"Error getting user connections: {str(e)}")
|
||||
return []
|
||||
|
||||
def getActiveKnowledgeConnections(self) -> List[UserConnection]:
|
||||
"""Return all UserConnections with knowledgeIngestionEnabled=True and status=active.
|
||||
|
||||
Used by the daily re-sync scheduler to determine which connections to re-index.
|
||||
"""
|
||||
try:
|
||||
rows = self.db.getRecordset(
|
||||
UserConnection,
|
||||
recordFilter={"knowledgeIngestionEnabled": True, "status": ConnectionStatus.ACTIVE.value},
|
||||
)
|
||||
result = []
|
||||
for row in rows or []:
|
||||
try:
|
||||
conn = UserConnection.model_validate(row) if isinstance(row, dict) else row
|
||||
result.append(conn)
|
||||
except Exception as _e:
|
||||
logger.warning(f"getActiveKnowledgeConnections: could not parse row: {_e}")
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"getActiveKnowledgeConnections failed: {e}")
|
||||
return []
|
||||
|
||||
def getUserConnectionById(self, connectionId: str) -> Optional[UserConnection]:
|
||||
"""Get a single UserConnection by ID or by reference string (connection:authority:username)."""
|
||||
try:
|
||||
|
|
@ -1317,18 +1327,21 @@ class AppObjects:
|
|||
|
||||
if connections:
|
||||
conn_dict = connections[0]
|
||||
return UserConnection(
|
||||
id=conn_dict["id"],
|
||||
userId=conn_dict["userId"],
|
||||
authority=conn_dict.get("authority"),
|
||||
externalId=conn_dict.get("externalId", ""),
|
||||
externalUsername=conn_dict.get("externalUsername", ""),
|
||||
externalEmail=conn_dict.get("externalEmail"),
|
||||
status=conn_dict.get("status", "pending"),
|
||||
connectedAt=conn_dict.get("connectedAt"),
|
||||
lastChecked=conn_dict.get("lastChecked"),
|
||||
expiresAt=conn_dict.get("expiresAt"),
|
||||
)
|
||||
try:
|
||||
return UserConnection.model_validate(conn_dict)
|
||||
except Exception:
|
||||
return UserConnection(
|
||||
id=conn_dict["id"],
|
||||
userId=conn_dict["userId"],
|
||||
authority=conn_dict.get("authority"),
|
||||
externalId=conn_dict.get("externalId", ""),
|
||||
externalUsername=conn_dict.get("externalUsername", ""),
|
||||
externalEmail=conn_dict.get("externalEmail"),
|
||||
status=conn_dict.get("status", "pending"),
|
||||
connectedAt=conn_dict.get("connectedAt"),
|
||||
lastChecked=conn_dict.get("lastChecked"),
|
||||
expiresAt=conn_dict.get("expiresAt"),
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting user connection by ID: {str(e)}")
|
||||
|
|
@ -4014,6 +4027,59 @@ class AppObjects:
|
|||
logger.error(f"Error deleting role {roleId}: {str(e)}")
|
||||
raise
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Table Grouping (user-defined groups for FormGeneratorTable instances)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def getTableGrouping(self, contextKey: str):
|
||||
"""
|
||||
Load the group tree for the current user and the given contextKey.
|
||||
|
||||
Returns a TableGrouping instance or None if no grouping has been saved yet.
|
||||
contextKey identifies the table instance, e.g. "connections", "prompts",
|
||||
"admin/users", "trustee/{instanceId}/documents".
|
||||
"""
|
||||
from modules.datamodels.datamodelPagination import TableGrouping
|
||||
try:
|
||||
records = self.db.getRecordset(
|
||||
TableGrouping,
|
||||
recordFilter={"userId": str(self.userId), "contextKey": contextKey},
|
||||
)
|
||||
if not records:
|
||||
return None
|
||||
row = records[0]
|
||||
return TableGrouping.model_validate(row) if isinstance(row, dict) else row
|
||||
except Exception as e:
|
||||
logger.error(f"getTableGrouping failed for user={self.userId} key={contextKey}: {e}")
|
||||
return None
|
||||
|
||||
def upsertTableGrouping(self, contextKey: str, rootGroups: list):
|
||||
"""
|
||||
Create or replace the group tree for the current user and contextKey.
|
||||
|
||||
rootGroups is a list of TableGroupNode-compatible dicts (the full tree).
|
||||
Returns the saved TableGrouping instance.
|
||||
"""
|
||||
from modules.datamodels.datamodelPagination import TableGrouping
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
try:
|
||||
existing = self.getTableGrouping(contextKey)
|
||||
data = {
|
||||
"id": existing.id if existing else str(uuid.uuid4()),
|
||||
"userId": str(self.userId),
|
||||
"contextKey": contextKey,
|
||||
"rootGroups": rootGroups,
|
||||
"updatedAt": getUtcTimestamp(),
|
||||
}
|
||||
if existing:
|
||||
self.db.recordModify(TableGrouping, existing.id, data)
|
||||
else:
|
||||
self.db.recordCreate(TableGrouping, data)
|
||||
return TableGrouping.model_validate(data)
|
||||
except Exception as e:
|
||||
logger.error(f"upsertTableGrouping failed for user={self.userId} key={contextKey}: {e}")
|
||||
raise
|
||||
|
||||
|
||||
# Public Methods
|
||||
|
||||
|
|
|
|||
|
|
@ -93,6 +93,46 @@ class KnowledgeObjects:
|
|||
self.db.recordModify(FileContentIndex, fileId, {"status": status})
|
||||
return True
|
||||
|
||||
def deleteFileContentIndexByConnectionId(self, connectionId: str) -> Dict[str, int]:
|
||||
"""Delete all FileContentIndex rows (and their ContentChunks) for a connection.
|
||||
|
||||
Used when a UserConnection is revoked / disconnected so the knowledge corpus
|
||||
no longer references data the user no longer grants access to. Returns a dict
|
||||
with counts to support observability logs.
|
||||
"""
|
||||
if not connectionId:
|
||||
return {"indexRows": 0, "chunks": 0}
|
||||
|
||||
rows = self.db.getRecordset(
|
||||
FileContentIndex, recordFilter={"connectionId": connectionId}
|
||||
)
|
||||
mandateIds: set = set()
|
||||
chunkCount = 0
|
||||
indexCount = 0
|
||||
for row in rows:
|
||||
fid = row.get("id") if isinstance(row, dict) else getattr(row, "id", None)
|
||||
mid = row.get("mandateId") if isinstance(row, dict) else getattr(row, "mandateId", "")
|
||||
if not fid:
|
||||
continue
|
||||
chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fid})
|
||||
for chunk in chunks:
|
||||
if self.db.recordDelete(ContentChunk, chunk["id"]):
|
||||
chunkCount += 1
|
||||
if self.db.recordDelete(FileContentIndex, fid):
|
||||
indexCount += 1
|
||||
if mid:
|
||||
mandateIds.add(str(mid))
|
||||
|
||||
for mid in mandateIds:
|
||||
try:
|
||||
from modules.interfaces.interfaceDbBilling import _getRootInterface
|
||||
|
||||
_getRootInterface().reconcileMandateStorageBilling(mid)
|
||||
except Exception as ex:
|
||||
logger.warning("reconcileMandateStorageBilling after connection purge failed: %s", ex)
|
||||
|
||||
return {"indexRows": indexCount, "chunks": chunkCount}
|
||||
|
||||
def deleteFileContentIndex(self, fileId: str) -> bool:
|
||||
"""Delete a FileContentIndex and all associated ContentChunks."""
|
||||
existing = self.getFileContentIndex(fileId)
|
||||
|
|
@ -603,41 +643,10 @@ def aggregateMandateRagTotalBytes(mandateId: str) -> int:
|
|||
if rid and str(rid) not in byId:
|
||||
byId[str(rid)] = row
|
||||
|
||||
# DEPRECATED: file-ID-correlation fallback from poweron_management.
|
||||
# Only needed for pre-migration data where mandateId/featureInstanceId on the
|
||||
# FileContentIndex are empty. Safe to remove once all environments are migrated.
|
||||
_fallbackCount = 0
|
||||
try:
|
||||
from modules.datamodels.datamodelFiles import FileItem
|
||||
from modules.interfaces.interfaceDbManagement import ComponentObjects
|
||||
mgmtDb = ComponentObjects().db
|
||||
knowledgeIf = getInterface(None)
|
||||
|
||||
fileIds: set = set()
|
||||
for f in mgmtDb.getRecordset(FileItem, recordFilter={"mandateId": mandateId}):
|
||||
fid = f.get("id") if isinstance(f, dict) else getattr(f, "id", None)
|
||||
if fid:
|
||||
fileIds.add(str(fid))
|
||||
for instId in instIds:
|
||||
for f in mgmtDb.getRecordset(FileItem, recordFilter={"featureInstanceId": instId}):
|
||||
fid = f.get("id") if isinstance(f, dict) else getattr(f, "id", None)
|
||||
if fid:
|
||||
fileIds.add(str(fid))
|
||||
|
||||
for fid in fileIds:
|
||||
if fid in byId:
|
||||
continue
|
||||
row = knowledgeIf.getFileContentIndex(fid)
|
||||
if row:
|
||||
byId[fid] = row
|
||||
_fallbackCount += 1
|
||||
except Exception as e:
|
||||
logger.warning("aggregateMandateRagTotalBytes fallback failed: %s", e)
|
||||
|
||||
total = sum(int(r.get("totalSize") or 0) for r in byId.values())
|
||||
logger.info(
|
||||
"aggregateMandateRagTotalBytes(%s): %d indexes, %d bytes (fallback: %d)",
|
||||
mandateId, len(byId), total, _fallbackCount,
|
||||
"aggregateMandateRagTotalBytes(%s): %d indexes, %d bytes",
|
||||
mandateId, len(byId), total,
|
||||
)
|
||||
return total
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ from modules.security.rbac import RbacClass
|
|||
from modules.datamodels.datamodelRbac import AccessRuleContext
|
||||
from modules.datamodels.datamodelUam import AccessLevel
|
||||
from modules.datamodels.datamodelFiles import FilePreview, FileItem, FileData
|
||||
from modules.datamodels.datamodelFileFolder import FileFolder
|
||||
from modules.datamodels.datamodelUtils import Prompt
|
||||
from modules.datamodels.datamodelMessaging import (
|
||||
MessagingSubscription,
|
||||
|
|
@ -115,7 +114,15 @@ class ComponentObjects:
|
|||
|
||||
# Update database context
|
||||
self.db.updateContext(self.userId)
|
||||
|
||||
|
||||
def _effective_user_id(self) -> Optional[str]:
|
||||
"""User id for audit + FileData writes; singleton hub may unset userId but keep currentUser."""
|
||||
if self.userId:
|
||||
return self.userId
|
||||
if self.currentUser is not None:
|
||||
return getattr(self.currentUser, "id", None)
|
||||
return None
|
||||
|
||||
def __del__(self):
|
||||
"""Cleanup method to close database connection."""
|
||||
if hasattr(self, 'db') and self.db is not None:
|
||||
|
|
@ -1103,15 +1110,12 @@ class ComponentObjects:
|
|||
return newfileName
|
||||
counter += 1
|
||||
|
||||
def createFile(self, name: str, mimeType: str, content: bytes, folderId: Optional[str] = None) -> FileItem:
|
||||
def createFile(self, name: str, mimeType: str, content: bytes) -> FileItem:
|
||||
"""Creates a new file entry if user has permission. Computes fileHash and fileSize from content.
|
||||
|
||||
Duplicate check: if a file with the same user + fileHash + fileName already exists,
|
||||
the existing file is returned instead of creating a new one.
|
||||
Same hash with different name is allowed (intentional copy by user).
|
||||
|
||||
Args:
|
||||
folderId: Optional parent folder ID. None/empty means the root folder.
|
||||
"""
|
||||
if not self.checkRbacPermission(FileItem, "create"):
|
||||
raise PermissionError("No permission to create files")
|
||||
|
|
@ -1139,11 +1143,6 @@ class ComponentObjects:
|
|||
else:
|
||||
scope = "personal"
|
||||
|
||||
# Normalize folderId: treat empty string as "no folder" (= root) – NULL in DB
|
||||
normalizedFolderId: Optional[str] = folderId
|
||||
if isinstance(normalizedFolderId, str) and not normalizedFolderId.strip():
|
||||
normalizedFolderId = None
|
||||
|
||||
fileItem = FileItem(
|
||||
mandateId=mandateId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
|
|
@ -1152,12 +1151,32 @@ class ComponentObjects:
|
|||
mimeType=mimeType,
|
||||
fileSize=fileSize,
|
||||
fileHash=fileHash,
|
||||
folderId=normalizedFolderId,
|
||||
)
|
||||
|
||||
# Ensure audit user is always stored: workflow/singleton contexts sometimes leave
|
||||
# the connector without _current_user_id, so _saveRecord skips sysCreatedBy →
|
||||
# getFile/createFileData RBAC then breaks (None != self.userId).
|
||||
uid = self._effective_user_id()
|
||||
if uid:
|
||||
fileItem = fileItem.model_copy(update={"sysCreatedBy": str(uid)})
|
||||
|
||||
# Store in database
|
||||
self.db.recordCreate(FileItem, fileItem)
|
||||
|
||||
verify = self.db.getRecordset(FileItem, recordFilter={"id": fileItem.id})
|
||||
verify_creator = (verify[0].get("sysCreatedBy") if verify else None)
|
||||
logger.info(
|
||||
"createFile: id=%s name=%s scope=%s model_sysCreatedBy=%r db_sysCreatedBy=%r mandateId=%r featureInstanceId=%r "
|
||||
"verify_rows=%s db=%s",
|
||||
fileItem.id,
|
||||
uniqueName,
|
||||
fileItem.scope,
|
||||
getattr(fileItem, "sysCreatedBy", None),
|
||||
verify_creator,
|
||||
mandateId or None,
|
||||
featureInstanceId if featureInstanceId else None,
|
||||
len(verify) if verify else 0,
|
||||
getattr(self.db, "dbDatabase", "?"),
|
||||
)
|
||||
|
||||
return fileItem
|
||||
|
||||
def _isFileOwner(self, file) -> bool:
|
||||
|
|
@ -1277,382 +1296,47 @@ class ComponentObjects:
|
|||
self.db.connection.rollback()
|
||||
raise FileDeletionError(f"Error deleting files in batch: {str(e)}")
|
||||
|
||||
# ---- Folder methods ----
|
||||
|
||||
_RESERVED_FOLDER_NAMES = {"(Global)"}
|
||||
|
||||
def _validateFolderName(self, name: str, parentId: Optional[str], excludeFolderId: Optional[str] = None):
|
||||
"""Ensures folder name is not reserved and is unique within parent."""
|
||||
if name in self._RESERVED_FOLDER_NAMES:
|
||||
raise ValueError(f"Folder name '{name}' is reserved")
|
||||
if not name or not name.strip():
|
||||
raise ValueError("Folder name cannot be empty")
|
||||
existingFolders = self.db.getRecordset(FileFolder, recordFilter={"parentId": parentId or ""})
|
||||
for f in existingFolders:
|
||||
if f.get("name") == name and f.get("id") != excludeFolderId:
|
||||
raise ValueError(f"Folder '{name}' already exists in this directory")
|
||||
|
||||
def _isDescendantOf(self, folderId: str, ancestorId: str) -> bool:
|
||||
"""Checks if folderId is a descendant of ancestorId (circular reference check)."""
|
||||
visited = set()
|
||||
currentId = folderId
|
||||
while currentId:
|
||||
if currentId == ancestorId:
|
||||
return True
|
||||
if currentId in visited:
|
||||
break
|
||||
visited.add(currentId)
|
||||
folders = self.db.getRecordset(FileFolder, recordFilter={"id": currentId})
|
||||
if not folders:
|
||||
break
|
||||
currentId = folders[0].get("parentId")
|
||||
return False
|
||||
|
||||
def _ensureFeatureInstanceFolder(self, featureInstanceId: str, mandateId: str = "") -> Optional[str]:
|
||||
"""Return the folder ID for a feature instance, creating it on first use.
|
||||
The folder is named after the feature instance label."""
|
||||
existing = self.db.getRecordset(
|
||||
FileFolder,
|
||||
recordFilter={
|
||||
"featureInstanceId": featureInstanceId,
|
||||
"sysCreatedBy": self.userId or "",
|
||||
},
|
||||
)
|
||||
if existing:
|
||||
return existing[0].get("id")
|
||||
|
||||
# Resolve the instance label for the folder name
|
||||
folderName = featureInstanceId[:8]
|
||||
def _ensureFeatureInstanceGroup(self, featureInstanceId: str, contextKey: str = "files/list") -> Optional[str]:
|
||||
"""Return the groupId of the default group for a feature instance.
|
||||
Creates the group if it doesn't exist yet."""
|
||||
try:
|
||||
from modules.datamodels.datamodelFeatures import FeatureInstance
|
||||
from modules.security.rootAccess import getRootDbAppConnector
|
||||
dbApp = getRootDbAppConnector()
|
||||
instances = dbApp.getRecordset(FeatureInstance, recordFilter={"id": featureInstanceId})
|
||||
if instances:
|
||||
folderName = instances[0].get("label") or folderName
|
||||
import modules.interfaces.interfaceDbApp as _appIface
|
||||
appInterface = _appIface.getInterface(self._currentUser)
|
||||
existing = appInterface.getTableGrouping(contextKey)
|
||||
nodes = [n.model_dump() if hasattr(n, 'model_dump') else (n if isinstance(n, dict) else vars(n)) for n in (existing.rootGroups if existing else [])]
|
||||
# Look for group with name matching featureInstanceId
|
||||
def _find(nds):
|
||||
for nd in nds:
|
||||
nid = nd.get("id") if isinstance(nd, dict) else getattr(nd, "id", None)
|
||||
nmeta = nd.get("meta", {}) if isinstance(nd, dict) else getattr(nd, "meta", {})
|
||||
if (nmeta or {}).get("featureInstanceId") == featureInstanceId:
|
||||
return nid
|
||||
subs = nd.get("subGroups", []) if isinstance(nd, dict) else getattr(nd, "subGroups", [])
|
||||
result = _find(subs)
|
||||
if result:
|
||||
return result
|
||||
return None
|
||||
found = _find(nodes)
|
||||
if found:
|
||||
return found
|
||||
# Create new group
|
||||
import uuid
|
||||
newId = str(uuid.uuid4())
|
||||
newGroup = {
|
||||
"id": newId,
|
||||
"name": featureInstanceId,
|
||||
"itemIds": [],
|
||||
"subGroups": [],
|
||||
"meta": {"featureInstanceId": featureInstanceId},
|
||||
}
|
||||
nodes.append(newGroup)
|
||||
appInterface.upsertTableGrouping(contextKey, nodes)
|
||||
return newId
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not resolve feature instance label: {e}")
|
||||
logger.error(f"_ensureFeatureInstanceGroup failed: {e}")
|
||||
return None
|
||||
|
||||
folder = FileFolder(
|
||||
name=folderName,
|
||||
parentId=None,
|
||||
mandateId=mandateId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
)
|
||||
created = self.db.recordCreate(FileFolder, folder)
|
||||
return created.get("id") if isinstance(created, dict) else getattr(created, "id", None)
|
||||
|
||||
def getFolder(self, folderId: str) -> Optional[Dict[str, Any]]:
|
||||
"""Returns a folder by ID if it belongs to the current user."""
|
||||
folders = self.db.getRecordset(FileFolder, recordFilter={"id": folderId, "sysCreatedBy": self.userId or ""})
|
||||
return folders[0] if folders else None
|
||||
|
||||
def listFolders(self, parentId: Optional[str] = None) -> List[Dict[str, Any]]:
|
||||
"""List folders visible to the current user.
|
||||
Own folders are always returned. Other users' folders are only
|
||||
returned when they contain files visible to the current user.
|
||||
Each folder is enriched with ``fileCount``."""
|
||||
recordFilter = {}
|
||||
if parentId is not None:
|
||||
recordFilter["parentId"] = parentId
|
||||
folders = self.db.getRecordset(FileFolder, recordFilter=recordFilter if recordFilter else None)
|
||||
|
||||
if not folders:
|
||||
return folders
|
||||
|
||||
folderIds = [f["id"] for f in folders if f.get("id")]
|
||||
fileCounts: Dict[str, int] = {}
|
||||
try:
|
||||
from modules.interfaces.interfaceRbac import buildFilesScopeWhereClause
|
||||
scopeClause = buildFilesScopeWhereClause(
|
||||
self.currentUser, "FileItem", self.db,
|
||||
self.mandateId, self.featureInstanceId,
|
||||
[], [],
|
||||
)
|
||||
|
||||
self.db._ensure_connection()
|
||||
with self.db.connection.cursor() as cursor:
|
||||
baseQuery = (
|
||||
'SELECT "folderId", COUNT(*) AS cnt '
|
||||
'FROM "FileItem" '
|
||||
'WHERE "folderId" = ANY(%s)'
|
||||
)
|
||||
queryValues: list = [folderIds]
|
||||
|
||||
if scopeClause:
|
||||
baseQuery += ' AND (' + scopeClause["condition"] + ')'
|
||||
queryValues.extend(scopeClause["values"])
|
||||
|
||||
baseQuery += ' GROUP BY "folderId"'
|
||||
cursor.execute(baseQuery, queryValues)
|
||||
for row in cursor.fetchall():
|
||||
fileCounts[row["folderId"]] = row["cnt"]
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not count files per folder: {e}")
|
||||
|
||||
userId = self.userId or ""
|
||||
result = []
|
||||
for folder in folders:
|
||||
fc = fileCounts.get(folder.get("id", ""), 0)
|
||||
folder["fileCount"] = fc
|
||||
isOwn = folder.get("sysCreatedBy") == userId
|
||||
if isOwn or fc > 0:
|
||||
result.append(folder)
|
||||
|
||||
return result
|
||||
|
||||
def createFolder(self, name: str, parentId: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Create a new folder with unique name validation."""
|
||||
self._validateFolderName(name, parentId)
|
||||
folder = FileFolder(
|
||||
name=name,
|
||||
parentId=parentId,
|
||||
mandateId=self.mandateId or "",
|
||||
featureInstanceId=self.featureInstanceId or "",
|
||||
)
|
||||
return self.db.recordCreate(FileFolder, folder)
|
||||
|
||||
def renameFolder(self, folderId: str, newName: str) -> bool:
|
||||
"""Rename a folder with unique name validation."""
|
||||
folder = self.getFolder(folderId)
|
||||
if not folder:
|
||||
raise FileNotFoundError(f"Folder {folderId} not found")
|
||||
self._validateFolderName(newName, folder.get("parentId"), excludeFolderId=folderId)
|
||||
return self.db.recordModify(FileFolder, folderId, {"name": newName})
|
||||
|
||||
def updateFolder(self, folderId: str, updateData: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Update folder metadata (e.g. ``scope``, ``neutralize``). Owner-only,
|
||||
same access model as renameFolder/moveFolder. Use ``renameFolder`` for
|
||||
``name`` changes (uniqueness validation) and ``moveFolder`` for
|
||||
``parentId`` changes (cycle/uniqueness validation).
|
||||
"""
|
||||
if not updateData:
|
||||
return True
|
||||
folder = self.getFolder(folderId)
|
||||
if not folder:
|
||||
raise FileNotFoundError(f"Folder {folderId} not found")
|
||||
forbiddenKeys = {"id", "sysCreatedBy", "sysCreatedAt", "sysUpdatedAt"}
|
||||
cleaned: Dict[str, Any] = {k: v for k, v in updateData.items() if k not in forbiddenKeys}
|
||||
if "name" in cleaned:
|
||||
self._validateFolderName(cleaned["name"], folder.get("parentId"), excludeFolderId=folderId)
|
||||
return self.db.recordModify(FileFolder, folderId, cleaned)
|
||||
|
||||
def moveFolder(self, folderId: str, targetParentId: Optional[str] = None) -> bool:
|
||||
"""Move a folder to a new parent, with circular reference and unique name checks."""
|
||||
folder = self.getFolder(folderId)
|
||||
if not folder:
|
||||
raise FileNotFoundError(f"Folder {folderId} not found")
|
||||
if targetParentId and self._isDescendantOf(targetParentId, folderId):
|
||||
raise ValueError("Cannot move folder into its own subtree")
|
||||
self._validateFolderName(folder.get("name", ""), targetParentId, excludeFolderId=folderId)
|
||||
return self.db.recordModify(FileFolder, folderId, {"parentId": targetParentId})
|
||||
|
||||
def moveFilesBatch(self, fileIds: List[str], targetFolderId: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Move multiple files with one SQL update.
|
||||
Owner can always move; non-owners need RBAC ALL level."""
|
||||
uniqueIds = [str(fid) for fid in dict.fromkeys(fileIds or []) if fid]
|
||||
if not uniqueIds:
|
||||
return {"movedFiles": 0}
|
||||
|
||||
if targetFolderId:
|
||||
targetFolder = self.getFolder(targetFolderId)
|
||||
if not targetFolder:
|
||||
raise FileNotFoundError(f"Target folder {targetFolderId} not found")
|
||||
|
||||
try:
|
||||
self.db._ensure_connection()
|
||||
with self.db.connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
'SELECT "id", "sysCreatedBy" FROM "FileItem" WHERE "id" = ANY(%s)',
|
||||
(uniqueIds,),
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
foundIds = {row["id"] for row in rows}
|
||||
missing = sorted(set(uniqueIds) - foundIds)
|
||||
if missing:
|
||||
raise FileNotFoundError(f"Files not found: {missing}")
|
||||
|
||||
for row in rows:
|
||||
self._requireFileWriteAccess(row, row["id"], "update")
|
||||
|
||||
accessibleIds = [row["id"] for row in rows]
|
||||
cursor.execute(
|
||||
'UPDATE "FileItem" SET "folderId" = %s, "sysModifiedAt" = %s, "sysModifiedBy" = %s '
|
||||
'WHERE "id" = ANY(%s)',
|
||||
(targetFolderId, getUtcTimestamp(), self.userId or "", accessibleIds),
|
||||
)
|
||||
movedFiles = cursor.rowcount
|
||||
|
||||
self.db.connection.commit()
|
||||
return {"movedFiles": movedFiles}
|
||||
except Exception as e:
|
||||
logger.error(f"Error moving files in batch: {e}")
|
||||
self.db.connection.rollback()
|
||||
raise FileError(f"Error moving files in batch: {str(e)}")
|
||||
|
||||
def moveFoldersBatch(self, folderIds: List[str], targetParentId: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Move multiple folders with one SQL update after validation."""
|
||||
uniqueIds = [str(fid) for fid in dict.fromkeys(folderIds or []) if fid]
|
||||
if not uniqueIds:
|
||||
return {"movedFolders": 0}
|
||||
|
||||
foldersToMove: List[Dict[str, Any]] = []
|
||||
for folderId in uniqueIds:
|
||||
folder = self.getFolder(folderId)
|
||||
if not folder:
|
||||
raise FileNotFoundError(f"Folder {folderId} not found")
|
||||
if targetParentId and self._isDescendantOf(targetParentId, folderId):
|
||||
raise ValueError("Cannot move folder into its own subtree")
|
||||
foldersToMove.append(folder)
|
||||
|
||||
existingInTarget = self.db.getRecordset(
|
||||
FileFolder,
|
||||
recordFilter={"parentId": targetParentId or "", "sysCreatedBy": self.userId or ""},
|
||||
)
|
||||
existingNames = {f.get("name"): f.get("id") for f in existingInTarget}
|
||||
movingNames: Dict[str, str] = {}
|
||||
movingIds = set(uniqueIds)
|
||||
|
||||
for folder in foldersToMove:
|
||||
name = folder.get("name", "")
|
||||
folderId = folder.get("id")
|
||||
if name in movingNames and movingNames[name] != folderId:
|
||||
raise ValueError(f"Folder '{name}' already exists in this move batch")
|
||||
movingNames[name] = folderId
|
||||
|
||||
existingId = existingNames.get(name)
|
||||
if existingId and existingId not in movingIds:
|
||||
raise ValueError(f"Folder '{name}' already exists in target directory")
|
||||
|
||||
try:
|
||||
self.db._ensure_connection()
|
||||
with self.db.connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
'UPDATE "FileFolder" SET "parentId" = %s, "sysModifiedAt" = %s, "sysModifiedBy" = %s '
|
||||
'WHERE "id" = ANY(%s) AND "sysCreatedBy" = %s',
|
||||
(targetParentId, getUtcTimestamp(), self.userId or "", uniqueIds, self.userId or ""),
|
||||
)
|
||||
movedFolders = cursor.rowcount
|
||||
|
||||
self.db.connection.commit()
|
||||
return {"movedFolders": movedFolders}
|
||||
except Exception as e:
|
||||
logger.error(f"Error moving folders in batch: {e}")
|
||||
self.db.connection.rollback()
|
||||
raise FileError(f"Error moving folders in batch: {str(e)}")
|
||||
|
||||
def deleteFolder(self, folderId: str, recursive: bool = False) -> Dict[str, Any]:
|
||||
"""Delete a folder. If recursive, deletes all contents. Returns summary of deletions."""
|
||||
folder = self.getFolder(folderId)
|
||||
if not folder:
|
||||
raise FileNotFoundError(f"Folder {folderId} not found")
|
||||
|
||||
childFolders = self.db.getRecordset(FileFolder, recordFilter={"parentId": folderId, "sysCreatedBy": self.userId or ""})
|
||||
childFiles = self._getFilesByCurrentUser(recordFilter={"folderId": folderId})
|
||||
|
||||
if not recursive and (childFolders or childFiles):
|
||||
raise ValueError(
|
||||
f"Folder '{folder.get('name')}' is not empty "
|
||||
f"({len(childFiles)} files, {len(childFolders)} subfolders). "
|
||||
f"Use recursive=true to delete contents."
|
||||
)
|
||||
|
||||
deletedFiles = 0
|
||||
deletedFolders = 0
|
||||
|
||||
if recursive:
|
||||
for subFolder in childFolders:
|
||||
subResult = self.deleteFolder(subFolder["id"], recursive=True)
|
||||
deletedFiles += subResult.get("deletedFiles", 0)
|
||||
deletedFolders += subResult.get("deletedFolders", 0)
|
||||
for childFile in childFiles:
|
||||
try:
|
||||
self.deleteFile(childFile["id"])
|
||||
deletedFiles += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete file {childFile['id']} during folder deletion: {e}")
|
||||
|
||||
self.db.recordDelete(FileFolder, folderId)
|
||||
deletedFolders += 1
|
||||
|
||||
return {"deletedFiles": deletedFiles, "deletedFolders": deletedFolders}
|
||||
|
||||
def deleteFoldersBatch(self, folderIds: List[str], recursive: bool = True) -> Dict[str, Any]:
|
||||
"""Delete multiple folders and their content in batched SQL calls."""
|
||||
uniqueIds = [str(fid) for fid in dict.fromkeys(folderIds or []) if fid]
|
||||
if not uniqueIds:
|
||||
return {"deletedFiles": 0, "deletedFolders": 0}
|
||||
|
||||
if not recursive:
|
||||
deletedFiles = 0
|
||||
deletedFolders = 0
|
||||
for folderId in uniqueIds:
|
||||
result = self.deleteFolder(folderId, recursive=False)
|
||||
deletedFiles += result.get("deletedFiles", 0)
|
||||
deletedFolders += result.get("deletedFolders", 0)
|
||||
return {"deletedFiles": deletedFiles, "deletedFolders": deletedFolders}
|
||||
|
||||
try:
|
||||
self.db._ensure_connection()
|
||||
with self.db.connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
'SELECT "id" FROM "FileFolder" WHERE "id" = ANY(%s) AND "sysCreatedBy" = %s',
|
||||
(uniqueIds, self.userId or ""),
|
||||
)
|
||||
rootAccessibleIds = [row["id"] for row in cursor.fetchall()]
|
||||
if len(rootAccessibleIds) != len(uniqueIds):
|
||||
missingIds = sorted(set(uniqueIds) - set(rootAccessibleIds))
|
||||
raise FileNotFoundError(f"Folders not found or not accessible: {missingIds}")
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
WITH RECURSIVE folder_tree AS (
|
||||
SELECT "id"
|
||||
FROM "FileFolder"
|
||||
WHERE "id" = ANY(%s) AND "sysCreatedBy" = %s
|
||||
UNION ALL
|
||||
SELECT child."id"
|
||||
FROM "FileFolder" child
|
||||
INNER JOIN folder_tree ft ON child."parentId" = ft."id"
|
||||
WHERE child."sysCreatedBy" = %s
|
||||
)
|
||||
SELECT DISTINCT "id" FROM folder_tree
|
||||
""",
|
||||
(rootAccessibleIds, self.userId or "", self.userId or ""),
|
||||
)
|
||||
allFolderIds = [row["id"] for row in cursor.fetchall()]
|
||||
|
||||
cursor.execute(
|
||||
'SELECT "id" FROM "FileItem" WHERE "folderId" = ANY(%s) AND "sysCreatedBy" = %s',
|
||||
(allFolderIds, self.userId or ""),
|
||||
)
|
||||
allFileIds = [row["id"] for row in cursor.fetchall()]
|
||||
|
||||
if allFileIds:
|
||||
cursor.execute('DELETE FROM "FileData" WHERE "id" = ANY(%s)', (allFileIds,))
|
||||
cursor.execute(
|
||||
'DELETE FROM "FileItem" WHERE "id" = ANY(%s) AND "sysCreatedBy" = %s',
|
||||
(allFileIds, self.userId or ""),
|
||||
)
|
||||
deletedFiles = cursor.rowcount
|
||||
else:
|
||||
deletedFiles = 0
|
||||
|
||||
cursor.execute(
|
||||
'DELETE FROM "FileFolder" WHERE "id" = ANY(%s) AND "sysCreatedBy" = %s',
|
||||
(allFolderIds, self.userId or ""),
|
||||
)
|
||||
deletedFolders = cursor.rowcount
|
||||
|
||||
self.db.connection.commit()
|
||||
return {"deletedFiles": deletedFiles, "deletedFolders": deletedFolders}
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting folders in batch: {e}")
|
||||
self.db.connection.rollback()
|
||||
raise FileDeletionError(f"Error deleting folders in batch: {str(e)}")
|
||||
|
||||
def copyFile(self, sourceFileId: str, targetFolderId: Optional[str] = None, newFileName: Optional[str] = None) -> FileItem:
|
||||
def copyFile(self, sourceFileId: str, newFileName: Optional[str] = None) -> FileItem:
|
||||
"""Create a full duplicate of a file (FileItem + FileData)."""
|
||||
sourceFile = self.getFile(sourceFileId)
|
||||
if not sourceFile:
|
||||
|
|
@ -1665,11 +1349,6 @@ class ComponentObjects:
|
|||
fileName = newFileName or sourceFile.fileName
|
||||
copiedFile = self.createFile(fileName, sourceFile.mimeType, sourceData)
|
||||
|
||||
if targetFolderId:
|
||||
self.updateFile(copiedFile.id, {"folderId": targetFolderId})
|
||||
elif sourceFile.folderId:
|
||||
self.updateFile(copiedFile.id, {"folderId": sourceFile.folderId})
|
||||
|
||||
self.createFileData(copiedFile.id, sourceData)
|
||||
return copiedFile
|
||||
|
||||
|
|
@ -1694,14 +1373,134 @@ class ComponentObjects:
|
|||
return success
|
||||
|
||||
# FileData methods - data operations
|
||||
|
||||
|
||||
def _getFileItemForDataWrite(self, fileId: str) -> Optional[FileItem]:
|
||||
"""Resolve FileItem for storing FileData: RBAC-aware getFile, then same-user row fallback.
|
||||
|
||||
createFile() can insert a row that getFile() still hides (e.g. scope NULL vs GROUP rules,
|
||||
or connector / context edge cases). The creator must still be allowed to attach blob data.
|
||||
"""
|
||||
logger.info(
|
||||
"[FileData] resolve start fileId=%s iface_userId=%r effective_uid=%r mandateId=%r featureInstanceId=%r db=%s",
|
||||
fileId,
|
||||
self.userId,
|
||||
self._effective_user_id(),
|
||||
self.mandateId,
|
||||
self.featureInstanceId,
|
||||
getattr(self.db, "dbDatabase", "?"),
|
||||
)
|
||||
file = self.getFile(fileId)
|
||||
if file:
|
||||
logger.info("[FileData] getFile OK fileId=%s", fileId)
|
||||
return file
|
||||
uid = self._effective_user_id()
|
||||
if not uid:
|
||||
logger.error(
|
||||
"[FileData] FAIL no user id fileId=%s userId=%r hasCurrentUser=%s",
|
||||
fileId,
|
||||
self.userId,
|
||||
self.currentUser is not None,
|
||||
)
|
||||
return None
|
||||
uid_s = str(uid)
|
||||
rows = self.db.getRecordset(FileItem, recordFilter={"id": fileId})
|
||||
if not rows:
|
||||
logger.error(
|
||||
"[FileData] FAIL no FileItem row fileId=%s (createFile committed to same db? db=%s)",
|
||||
fileId,
|
||||
getattr(self.db, "dbDatabase", "?"),
|
||||
)
|
||||
return None
|
||||
row = dict(rows[0])
|
||||
creator = row.get("sysCreatedBy")
|
||||
creator_s = str(creator) if creator is not None else None
|
||||
if creator_s != uid_s:
|
||||
if not creator_s:
|
||||
try:
|
||||
self.db.recordModify(FileItem, fileId, {"sysCreatedBy": uid_s})
|
||||
row["sysCreatedBy"] = uid_s
|
||||
logger.warning(
|
||||
"[FileData] patched NULL sysCreatedBy fileId=%s -> %s",
|
||||
fileId,
|
||||
uid_s,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"[FileData] FAIL patch sysCreatedBy fileId=%s: %s",
|
||||
fileId,
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
return None
|
||||
else:
|
||||
# _saveRecord used to overwrite explicit creators with contextvar "system"
|
||||
if creator_s == "system":
|
||||
try:
|
||||
self.db.recordModify(FileItem, fileId, {"sysCreatedBy": uid_s})
|
||||
row["sysCreatedBy"] = uid_s
|
||||
logger.warning(
|
||||
"[FileData] patched sysCreatedBy system→user fileId=%s -> %s",
|
||||
fileId,
|
||||
uid_s,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"[FileData] FAIL patch system sysCreatedBy fileId=%s: %s",
|
||||
fileId,
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
return None
|
||||
else:
|
||||
logger.error(
|
||||
"[FileData] FAIL creator mismatch fileId=%s row.sysCreatedBy=%r (%s) effective_uid=%r (%s) scope=%r",
|
||||
fileId,
|
||||
creator,
|
||||
type(creator).__name__,
|
||||
uid,
|
||||
type(uid).__name__,
|
||||
row.get("scope"),
|
||||
)
|
||||
return None
|
||||
logger.info(
|
||||
"[FileData] RBAC miss, owner fallback OK fileId=%s scope=%r sysCreatedBy=%r",
|
||||
fileId,
|
||||
row.get("scope"),
|
||||
row.get("sysCreatedBy"),
|
||||
)
|
||||
try:
|
||||
if row.get("sysCreatedAt") is None or row.get("sysCreatedAt") in (0, 0.0):
|
||||
row["sysCreatedAt"] = getUtcTimestamp()
|
||||
if row.get("scope") is None:
|
||||
row["scope"] = "personal"
|
||||
if row.get("neutralize") is None:
|
||||
row["neutralize"] = False
|
||||
return FileItem(**row)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"[FileData] FAIL FileItem(**row) fileId=%s keys=%s err=%s",
|
||||
fileId,
|
||||
list(row.keys()),
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
return None
|
||||
|
||||
def createFileData(self, fileId: str, data: bytes) -> bool:
|
||||
"""Stores the binary data of a file in the database."""
|
||||
try:
|
||||
logger.info(
|
||||
"[FileData] createFileData enter fileId=%s bytes=%s",
|
||||
fileId,
|
||||
len(data) if data is not None else 0,
|
||||
)
|
||||
# Check file access
|
||||
file = self.getFile(fileId)
|
||||
file = self._getFileItemForDataWrite(fileId)
|
||||
if not file:
|
||||
logger.error(f"File with ID {fileId} not found when storing data")
|
||||
logger.error(
|
||||
"[FileData] FAIL _getFileItemForDataWrite returned None fileId=%s",
|
||||
fileId,
|
||||
)
|
||||
return False
|
||||
|
||||
# Determine if this is a text-based format
|
||||
|
|
@ -1745,13 +1544,11 @@ class ComponentObjects:
|
|||
}
|
||||
|
||||
self.db.recordCreate(FileData, fileDataObj)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
|
||||
logger.debug(f"Successfully stored data for file {fileId} (base64Encoded: {base64Encoded})")
|
||||
|
||||
logger.info("[FileData] recordCreate OK fileId=%s base64Encoded=%s", fileId, base64Encoded)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error storing data for file {fileId}: {str(e)}")
|
||||
logger.error("Error storing data for file %s: %s", fileId, e, exc_info=True)
|
||||
return False
|
||||
|
||||
def getFileData(self, fileId: str) -> Optional[bytes]:
|
||||
|
|
@ -1884,18 +1681,14 @@ class ComponentObjects:
|
|||
logger.error(f"Error getting file content: {str(e)}")
|
||||
return None
|
||||
|
||||
def saveUploadedFile(self, fileContent: bytes, fileName: str, folderId: Optional[str] = None) -> tuple[FileItem, str]:
|
||||
"""Saves an uploaded file if user has permission.
|
||||
|
||||
Args:
|
||||
folderId: Optional parent folder ID. None means root folder.
|
||||
"""
|
||||
def saveUploadedFile(self, fileContent: bytes, fileName: str) -> tuple[FileItem, str]:
|
||||
"""Saves an uploaded file if user has permission."""
|
||||
try:
|
||||
# Check file creation permission
|
||||
if not self.checkRbacPermission(FileItem, "create"):
|
||||
raise PermissionError("No permission to upload files")
|
||||
|
||||
logger.debug(f"Starting upload process for file: {fileName} (folderId={folderId!r})")
|
||||
logger.debug(f"Starting upload process for file: {fileName}")
|
||||
|
||||
if not isinstance(fileContent, bytes):
|
||||
logger.error(f"Invalid fileContent type: {type(fileContent)}")
|
||||
|
|
@ -1921,7 +1714,6 @@ class ComponentObjects:
|
|||
name=fileName,
|
||||
mimeType=mimeType,
|
||||
content=fileContent,
|
||||
folderId=folderId,
|
||||
)
|
||||
|
||||
# Save binary data
|
||||
|
|
|
|||
|
|
@ -347,6 +347,7 @@ class FeatureInterface:
|
|||
"templateSourceId": templateId,
|
||||
"templateScope": "instance",
|
||||
"active": True,
|
||||
"targetFeatureInstanceId": instanceId,
|
||||
})
|
||||
copied += 1
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -204,7 +204,6 @@ TABLE_NAMESPACE = {
|
|||
# Files - benutzer-eigen
|
||||
"FileItem": "files",
|
||||
"FileData": "files",
|
||||
"FileFolder": "files",
|
||||
# Automation - benutzer-eigen
|
||||
"AutomationDefinition": "automation",
|
||||
"AutomationTemplate": "automation",
|
||||
|
|
@ -529,8 +528,7 @@ def getRecordsetPaginatedWithRBAC(
|
|||
if val is None:
|
||||
# val=None in pagination.filters means "match empty/null"
|
||||
# (same convention as connectorDbPostgre._buildPaginationClauses).
|
||||
# Covers both historical empty-string values and true NULLs
|
||||
# e.g. root-folder files where folderId may be "" or NULL.
|
||||
# Covers both historical empty-string values and true NULLs.
|
||||
whereConditions.append(f'("{key}" IS NULL OR "{key}"::TEXT = \'\')')
|
||||
continue
|
||||
if isinstance(val, dict):
|
||||
|
|
@ -689,8 +687,7 @@ def getDistinctColumnValuesWithRBAC(
|
|||
if val is None:
|
||||
# val=None in pagination.filters means "match empty/null"
|
||||
# (same convention as connectorDbPostgre._buildPaginationClauses).
|
||||
# Covers both historical empty-string values and true NULLs
|
||||
# e.g. root-folder files where folderId may be "" or NULL.
|
||||
# Covers both historical empty-string values and true NULLs.
|
||||
whereConditions.append(f'("{key}" IS NULL OR "{key}"::TEXT = \'\')')
|
||||
continue
|
||||
if isinstance(val, dict):
|
||||
|
|
@ -749,6 +746,7 @@ def buildFilesScopeWhereClause(
|
|||
Only own files: sysCreatedBy = currentUser
|
||||
|
||||
WITH instance context (Instanz-Seiten):
|
||||
- scope = 'personal' AND sysCreatedBy = me (creator's personal files; e.g. workflow outputs)
|
||||
- sysCreatedBy = me AND featureInstanceId = X (own personal files of this instance)
|
||||
- scope = 'featureInstance' AND featureInstanceId = X
|
||||
- scope = 'mandate' AND mandateId = M (M = mandate of the instance)
|
||||
|
|
@ -782,6 +780,15 @@ def buildFilesScopeWhereClause(
|
|||
scopeParts: List[str] = []
|
||||
scopeValues: List = []
|
||||
|
||||
# Personal files created by this user must remain visible even when the request
|
||||
# carries mandate/instance context (GROUP reads use this clause). Otherwise
|
||||
# createFile → createFileData → getFile fails and workflow outputs vanish from /files.
|
||||
# Also treat scope IS NULL as legacy/personal for the owner (column default not applied).
|
||||
scopeParts.append(
|
||||
'(("scope" = \'personal\' OR "scope" IS NULL) AND "sysCreatedBy" = %s)'
|
||||
)
|
||||
scopeValues.append(currentUser.id)
|
||||
|
||||
if featureInstanceId:
|
||||
# 1) Own personal files of this specific instance
|
||||
scopeParts.append('("sysCreatedBy" = %s AND "featureInstanceId" = %s)')
|
||||
|
|
|
|||
0
modules/migrations/__init__.py
Normal file
0
modules/migrations/__init__.py
Normal file
240
modules/migrations/migrate_folders_to_groups.py
Normal file
240
modules/migrations/migrate_folders_to_groups.py
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
"""
|
||||
One-time migration: Convert FileFolder tree + FileItem.folderId → table_groupings.
|
||||
|
||||
Run this BEFORE dropping the physical FileFolder table and FileItem.folderId column
|
||||
from the database (those are separate Alembic/SQL steps).
|
||||
|
||||
Usage:
|
||||
python -m modules.migrations.migrate_folders_to_groups [--dry-run] [--verbose]
|
||||
|
||||
Steps:
|
||||
1. For each distinct (userId, mandateId) combination that has FileFolder records:
|
||||
a. Build the full folder tree (recursive)
|
||||
b. Write it as a TableGroupNode tree into table_groupings (contextKey='files/list')
|
||||
– merges with any existing groups rather than overwriting
|
||||
c. For each FileItem with a folderId that maps into this tree,
|
||||
add its id to the matching group's itemIds
|
||||
2. Print a summary (rows migrated, groups created, files assigned)
|
||||
3. If not --dry-run: commits the inserts/updates
|
||||
NOTE: Schema changes (ALTER TABLE DROP COLUMN, DROP TABLE) are intentionally
|
||||
NOT performed by this script. Run the corresponding Alembic migration
|
||||
(migrations/versions/xxxx_drop_folder_columns.py) afterwards.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def _build_tree(folders: list, parent_id: Optional[str]) -> list:
|
||||
"""Recursively build TableGroupNode-compatible dicts from a flat folder list."""
|
||||
children = [f for f in folders if f.get("parentId") == parent_id]
|
||||
result = []
|
||||
for folder in children:
|
||||
node = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"name": folder["name"],
|
||||
"itemIds": [],
|
||||
"subGroups": _build_tree(folders, folder["id"]),
|
||||
"meta": {"migratedFromFolderId": folder["id"]},
|
||||
}
|
||||
result.append(node)
|
||||
return result
|
||||
|
||||
|
||||
def _assign_files_to_nodes(nodes: list, files_by_folder: dict) -> list:
|
||||
"""Recursively assign file IDs to group nodes based on folder mapping."""
|
||||
for node in nodes:
|
||||
folder_id = (node.get("meta") or {}).get("migratedFromFolderId")
|
||||
if folder_id and folder_id in files_by_folder:
|
||||
node["itemIds"] = list(files_by_folder[folder_id])
|
||||
node["subGroups"] = _assign_files_to_nodes(node.get("subGroups", []), files_by_folder)
|
||||
return nodes
|
||||
|
||||
|
||||
def _count_items(nodes: list) -> int:
|
||||
total = 0
|
||||
for node in nodes:
|
||||
total += len(node.get("itemIds", []))
|
||||
total += _count_items(node.get("subGroups", []))
|
||||
return total
|
||||
|
||||
|
||||
def _now_ts() -> str:
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
return getUtcTimestamp()
|
||||
|
||||
|
||||
# ── Main migration ────────────────────────────────────────────────────────────
|
||||
|
||||
def run_migration(dry_run: bool = True, verbose: bool = False):
|
||||
"""Main migration entry point."""
|
||||
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
|
||||
logger.info(f"Starting folder→group migration (dry_run={dry_run})")
|
||||
|
||||
from modules.connectors.connectorDbPostgre import getCachedConnector
|
||||
|
||||
connector = getCachedConnector()
|
||||
if not connector or not connector.connection:
|
||||
logger.error("Could not obtain a DB connection. Aborting.")
|
||||
return
|
||||
|
||||
conn = connector.connection
|
||||
cur = conn.cursor()
|
||||
|
||||
# ── 1. Check that the source tables still exist ───────────────────────────
|
||||
cur.execute("""
|
||||
SELECT EXISTS (
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_name = 'FileFolder'
|
||||
)
|
||||
""")
|
||||
folder_table_exists = cur.fetchone()[0]
|
||||
|
||||
cur.execute("""
|
||||
SELECT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'FileItem' AND column_name = 'folderId'
|
||||
)
|
||||
""")
|
||||
folder_column_exists = cur.fetchone()[0]
|
||||
|
||||
if not folder_table_exists and not folder_column_exists:
|
||||
logger.info("FileFolder table and FileItem.folderId column not found — migration already applied or not needed.")
|
||||
return
|
||||
|
||||
if not folder_table_exists:
|
||||
logger.warning("FileFolder table missing but FileItem.folderId column still present. Only file assignments will be migrated.")
|
||||
if not folder_column_exists:
|
||||
logger.warning("FileItem.folderId column missing but FileFolder table still present. Only group tree structure will be migrated.")
|
||||
|
||||
# ── 2. Load all folders ───────────────────────────────────────────────────
|
||||
folders_by_user: dict = {}
|
||||
if folder_table_exists:
|
||||
cur.execute('SELECT "id", "name", "parentId", "sysCreatedBy", "mandateId" FROM "FileFolder"')
|
||||
for row in cur.fetchall():
|
||||
fid, fname, parent_id, user_id, mandate_id = row
|
||||
key = (str(user_id), str(mandate_id) if mandate_id else "")
|
||||
folders_by_user.setdefault(key, []).append({
|
||||
"id": fid, "name": fname, "parentId": parent_id,
|
||||
})
|
||||
logger.info(f"Loaded folders for {len(folders_by_user)} (user, mandate) combinations")
|
||||
|
||||
# ── 3. Load file→folder assignments ──────────────────────────────────────
|
||||
files_by_key: dict = {}
|
||||
if folder_column_exists:
|
||||
cur.execute(
|
||||
'SELECT "id", "folderId", "sysCreatedBy", "mandateId" FROM "FileItem" WHERE "folderId" IS NOT NULL AND "folderId" != \'\''
|
||||
)
|
||||
for row in cur.fetchall():
|
||||
file_id, folder_id, user_id, mandate_id = row
|
||||
key = (str(user_id), str(mandate_id) if mandate_id else "")
|
||||
files_by_key.setdefault(key, {}).setdefault(folder_id, []).append(file_id)
|
||||
total_files = sum(
|
||||
sum(len(v) for v in d.values()) for d in files_by_key.values()
|
||||
)
|
||||
logger.info(f"Found {total_files} file→folder assignments across {len(files_by_key)} (user, mandate) combos")
|
||||
|
||||
# ── 4. Combine and upsert groupings ──────────────────────────────────────
|
||||
all_keys = set(folders_by_user.keys()) | set(files_by_key.keys())
|
||||
stats = {"groups_created": 0, "groupings_upserted": 0, "files_assigned": 0}
|
||||
|
||||
for key in all_keys:
|
||||
user_id, mandate_id = key
|
||||
folders = folders_by_user.get(key, [])
|
||||
files_by_folder = files_by_key.get(key, {})
|
||||
|
||||
# Build tree
|
||||
roots = _build_tree(folders, None)
|
||||
roots = _assign_files_to_nodes(roots, files_by_folder)
|
||||
|
||||
# Handle files in unknown folders (folder no longer in tree)
|
||||
known_folder_ids = {f["id"] for f in folders}
|
||||
for folder_id, file_ids in files_by_folder.items():
|
||||
if folder_id not in known_folder_ids:
|
||||
# Orphaned files: put them in an "Orphaned" group
|
||||
roots.append({
|
||||
"id": str(uuid.uuid4()),
|
||||
"name": f"Orphaned (folder {folder_id[:8]}…)",
|
||||
"itemIds": file_ids,
|
||||
"subGroups": [],
|
||||
"meta": {"migratedFromFolderId": folder_id, "orphaned": True},
|
||||
})
|
||||
|
||||
if not roots:
|
||||
continue
|
||||
|
||||
n_items = _count_items(roots)
|
||||
stats["groups_created"] += len(roots)
|
||||
stats["files_assigned"] += n_items
|
||||
|
||||
context_key = "files/list"
|
||||
if verbose:
|
||||
logger.debug(f" user={user_id} mandate={mandate_id}: {len(roots)} root groups, {n_items} files")
|
||||
|
||||
if not dry_run:
|
||||
# Check for existing grouping
|
||||
cur.execute(
|
||||
'SELECT "id", "rootGroups" FROM "TableGrouping" WHERE "userId" = %s AND "contextKey" = %s',
|
||||
(user_id, context_key),
|
||||
)
|
||||
existing_row = cur.fetchone()
|
||||
|
||||
if existing_row:
|
||||
existing_id, existing_raw = existing_row
|
||||
existing_roots = json.loads(existing_raw) if isinstance(existing_raw, str) else (existing_raw or [])
|
||||
# Merge: append migrated groups (avoid duplicates by migratedFromFolderId)
|
||||
existing_meta_ids = {
|
||||
(n.get("meta") or {}).get("migratedFromFolderId")
|
||||
for n in existing_roots
|
||||
if (n.get("meta") or {}).get("migratedFromFolderId")
|
||||
}
|
||||
new_roots = existing_roots + [
|
||||
r for r in roots
|
||||
if (r.get("meta") or {}).get("migratedFromFolderId") not in existing_meta_ids
|
||||
]
|
||||
cur.execute(
|
||||
'UPDATE "TableGrouping" SET "rootGroups" = %s, "updatedAt" = %s WHERE "id" = %s',
|
||||
(json.dumps(new_roots), _now_ts(), existing_id),
|
||||
)
|
||||
else:
|
||||
new_id = str(uuid.uuid4())
|
||||
cur.execute(
|
||||
'INSERT INTO "TableGrouping" ("id", "userId", "contextKey", "rootGroups", "updatedAt") VALUES (%s, %s, %s, %s, %s)',
|
||||
(new_id, user_id, context_key, json.dumps(roots), _now_ts()),
|
||||
)
|
||||
stats["groupings_upserted"] += 1
|
||||
|
||||
# ── 5. Summary ────────────────────────────────────────────────────────────
|
||||
if not dry_run:
|
||||
conn.commit()
|
||||
logger.info("Migration committed.")
|
||||
else:
|
||||
logger.info("DRY RUN — no changes written.")
|
||||
|
||||
logger.info(
|
||||
f"Summary: groupings_upserted={stats['groupings_upserted']}, "
|
||||
f"groups_created={stats['groups_created']}, "
|
||||
f"files_assigned={stats['files_assigned']}"
|
||||
)
|
||||
logger.info(
|
||||
"Next steps (run after verifying data):\n"
|
||||
" 1. Run Alembic migration to DROP COLUMN FileItem.folderId\n"
|
||||
" 2. Run Alembic migration to DROP TABLE FileFolder"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Migrate FileFolder tree to table_groupings")
|
||||
parser.add_argument("--dry-run", action="store_true", default=True, help="Preview only, no DB writes (default)")
|
||||
parser.add_argument("--execute", action="store_true", help="Actually write to DB (disables dry-run)")
|
||||
parser.add_argument("--verbose", action="store_true", help="Show per-user details")
|
||||
args = parser.parse_args()
|
||||
dry_run = not args.execute
|
||||
run_migration(dry_run=dry_run, verbose=args.verbose)
|
||||
305
modules/routes/routeAutomationWorkspace.py
Normal file
305
modules/routes/routeAutomationWorkspace.py
Normal file
|
|
@ -0,0 +1,305 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
User-facing Automation Workspace API.
|
||||
|
||||
Lists workflow runs the user can access (via FeatureAccess on
|
||||
targetFeatureInstanceId) and provides detail views with step logs
|
||||
and linked files. Designed for the "Workspace" tab under
|
||||
Nutzung > Automation.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import math
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Request, Query, Path, HTTPException
|
||||
from slowapi import Limiter
|
||||
from slowapi.util import get_remote_address
|
||||
|
||||
from modules.auth.authentication import getRequestContext, RequestContext
|
||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.features.graphicalEditor.datamodelFeatureGraphicalEditor import (
|
||||
AutoRun,
|
||||
AutoStepLog,
|
||||
AutoWorkflow,
|
||||
)
|
||||
from modules.features.graphicalEditor.interfaceFeatureGraphicalEditor import graphicalEditorDatabase
|
||||
from modules.shared.i18nRegistry import apiRouteContext
|
||||
|
||||
routeApiMsg = apiRouteContext("routeAutomationWorkspace")
|
||||
logger = logging.getLogger(__name__)
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
|
||||
router = APIRouter(prefix="/api/automations/runs", tags=["AutomationWorkspace"])
|
||||
|
||||
|
||||
def _getDb() -> DatabaseConnector:
|
||||
return DatabaseConnector(
|
||||
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||
dbDatabase=graphicalEditorDatabase,
|
||||
dbUser=APP_CONFIG.get("DB_USER"),
|
||||
dbPassword=APP_CONFIG.get("DB_PASSWORD_SECRET") or APP_CONFIG.get("DB_PASSWORD"),
|
||||
dbPort=int(APP_CONFIG.get("DB_PORT", 5432)),
|
||||
userId=None,
|
||||
)
|
||||
|
||||
|
||||
def _getUserAccessibleInstanceIds(userId: str) -> list[str]:
|
||||
"""Return all featureInstanceIds the user has enabled FeatureAccess for."""
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
rootIface = getRootInterface()
|
||||
allAccess = rootIface.getFeatureAccessesForUser(userId) or []
|
||||
return [
|
||||
a.featureInstanceId
|
||||
for a in allAccess
|
||||
if a.featureInstanceId and a.enabled
|
||||
]
|
||||
|
||||
|
||||
_FILE_REF_KEYS = ("fileId", "documentId", "fileIds", "documents")
|
||||
|
||||
|
||||
def _extractFileIdsFromValue(value, accumulator: set[str]) -> None:
|
||||
"""Recursively scan a value (dict/list/str) for file id references."""
|
||||
if isinstance(value, dict):
|
||||
for key, sub in value.items():
|
||||
if key in _FILE_REF_KEYS:
|
||||
_collectFileIdsFromRef(sub, accumulator)
|
||||
else:
|
||||
_extractFileIdsFromValue(sub, accumulator)
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
_extractFileIdsFromValue(item, accumulator)
|
||||
|
||||
|
||||
def _collectFileIdsFromRef(val, accumulator: set[str]) -> None:
|
||||
"""Add file ids from a value located under a known file-reference key."""
|
||||
if isinstance(val, str) and val:
|
||||
accumulator.add(val)
|
||||
elif isinstance(val, list):
|
||||
for v in val:
|
||||
if isinstance(v, str) and v:
|
||||
accumulator.add(v)
|
||||
elif isinstance(v, dict) and v.get("id"):
|
||||
accumulator.add(v["id"])
|
||||
elif isinstance(val, dict) and val.get("id"):
|
||||
accumulator.add(val["id"])
|
||||
|
||||
|
||||
@router.get("")
|
||||
@limiter.limit("60/minute")
|
||||
def listWorkspaceRuns(
|
||||
request: Request,
|
||||
scope: str = Query("mine", description="mine = own runs, mandate = all accessible"),
|
||||
status: Optional[str] = Query(None, description="Filter by run status"),
|
||||
targetInstanceId: Optional[str] = Query(None, description="Filter by targetFeatureInstanceId"),
|
||||
workflowId: Optional[str] = Query(None, description="Filter by workflow"),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
offset: int = Query(0, ge=0),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> dict:
|
||||
"""List workflow runs visible to the user.
|
||||
|
||||
scope=mine: only runs owned by the user.
|
||||
scope=mandate: all runs where the user has FeatureAccess on the
|
||||
workflow's targetFeatureInstanceId.
|
||||
"""
|
||||
db = _getDb()
|
||||
if not db._ensureTableExists(AutoRun):
|
||||
return {"runs": [], "total": 0, "limit": limit, "offset": offset}
|
||||
|
||||
userId = str(context.user.id) if context.user else None
|
||||
if not userId:
|
||||
raise HTTPException(status_code=401, detail=routeApiMsg("Authentication required"))
|
||||
|
||||
accessibleInstanceIds = _getUserAccessibleInstanceIds(userId)
|
||||
if not accessibleInstanceIds:
|
||||
return {"runs": [], "total": 0, "limit": limit, "offset": offset}
|
||||
|
||||
if not db._ensureTableExists(AutoWorkflow):
|
||||
return {"runs": [], "total": 0, "limit": limit, "offset": offset}
|
||||
|
||||
wfFilter: dict = {}
|
||||
if targetInstanceId:
|
||||
if targetInstanceId not in accessibleInstanceIds:
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("Access denied to target instance"))
|
||||
wfFilter["targetFeatureInstanceId"] = targetInstanceId
|
||||
workflows = db.getRecordset(AutoWorkflow, recordFilter=wfFilter or None) or []
|
||||
|
||||
visibleWfIds: set[str] = set()
|
||||
wfMap: dict = {}
|
||||
for wf in workflows:
|
||||
wfDict = dict(wf)
|
||||
tid = wfDict.get("targetFeatureInstanceId") or wfDict.get("featureInstanceId")
|
||||
if tid and tid in accessibleInstanceIds:
|
||||
wfId = wfDict.get("id")
|
||||
if wfId:
|
||||
visibleWfIds.add(wfId)
|
||||
wfMap[wfId] = wfDict
|
||||
|
||||
if workflowId:
|
||||
if workflowId not in visibleWfIds:
|
||||
return {"runs": [], "total": 0, "limit": limit, "offset": offset}
|
||||
visibleWfIds = {workflowId}
|
||||
|
||||
if not visibleWfIds:
|
||||
return {"runs": [], "total": 0, "limit": limit, "offset": offset}
|
||||
|
||||
allRuns = db.getRecordset(AutoRun, recordFilter={}) or []
|
||||
filtered = []
|
||||
for r in allRuns:
|
||||
row = dict(r)
|
||||
if row.get("workflowId") not in visibleWfIds:
|
||||
continue
|
||||
if scope == "mine" and row.get("ownerId") != userId:
|
||||
continue
|
||||
if status and row.get("status") != status:
|
||||
continue
|
||||
filtered.append(row)
|
||||
|
||||
filtered.sort(
|
||||
key=lambda x: x.get("startedAt") or x.get("sysCreatedAt") or 0,
|
||||
reverse=True,
|
||||
)
|
||||
total = len(filtered)
|
||||
page = filtered[offset: offset + limit]
|
||||
|
||||
from modules.routes.routeHelpers import enrichRowsWithFkLabels, resolveMandateLabels, resolveInstanceLabels
|
||||
|
||||
for row in page:
|
||||
wf = wfMap.get(row.get("workflowId"), {})
|
||||
row["workflowLabel"] = row.get("label") or wf.get("label") or row.get("workflowId", "")
|
||||
row["targetFeatureInstanceId"] = wf.get("targetFeatureInstanceId") or wf.get("featureInstanceId")
|
||||
|
||||
enrichRowsWithFkLabels(
|
||||
page,
|
||||
labelResolvers={
|
||||
"mandateId": resolveMandateLabels,
|
||||
"targetFeatureInstanceId": resolveInstanceLabels,
|
||||
},
|
||||
)
|
||||
for row in page:
|
||||
row["targetInstanceLabel"] = row.pop("targetFeatureInstanceIdLabel", None)
|
||||
row["mandateLabel"] = row.pop("mandateIdLabel", None)
|
||||
|
||||
return {"runs": page, "total": total, "limit": limit, "offset": offset}
|
||||
|
||||
|
||||
@router.get("/{runId}/detail")
|
||||
@limiter.limit("60/minute")
|
||||
def getWorkspaceRunDetail(
|
||||
request: Request,
|
||||
runId: str = Path(..., description="Run ID"),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> dict:
|
||||
"""Get full detail for a single run: metadata, step logs, linked files."""
|
||||
db = _getDb()
|
||||
userId = str(context.user.id) if context.user else None
|
||||
if not userId:
|
||||
raise HTTPException(status_code=401, detail=routeApiMsg("Authentication required"))
|
||||
|
||||
if not db._ensureTableExists(AutoRun):
|
||||
raise HTTPException(status_code=404, detail=routeApiMsg("Run not found"))
|
||||
|
||||
runs = db.getRecordset(AutoRun, recordFilter={"id": runId})
|
||||
if not runs:
|
||||
raise HTTPException(status_code=404, detail=routeApiMsg("Run not found"))
|
||||
run = dict(runs[0])
|
||||
|
||||
wfId = run.get("workflowId")
|
||||
workflow: dict = {}
|
||||
if wfId and db._ensureTableExists(AutoWorkflow):
|
||||
wfs = db.getRecordset(AutoWorkflow, recordFilter={"id": wfId})
|
||||
if wfs:
|
||||
workflow = dict(wfs[0])
|
||||
|
||||
tid = workflow.get("targetFeatureInstanceId") or workflow.get("featureInstanceId")
|
||||
accessibleIds = _getUserAccessibleInstanceIds(userId)
|
||||
isOwner = run.get("ownerId") == userId
|
||||
|
||||
if not isOwner and (not tid or tid not in accessibleIds) and not context.isPlatformAdmin:
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("Access denied"))
|
||||
|
||||
steps: list = []
|
||||
if db._ensureTableExists(AutoStepLog):
|
||||
stepRecords = db.getRecordset(AutoStepLog, recordFilter={"runId": runId}) or []
|
||||
steps = [dict(s) for s in stepRecords]
|
||||
steps.sort(key=lambda s: s.get("startedAt") or 0)
|
||||
|
||||
allFileIds: set[str] = set()
|
||||
perStepFileIds: list[tuple[set[str], set[str]]] = []
|
||||
for step in steps:
|
||||
inputIds: set[str] = set()
|
||||
outputIds: set[str] = set()
|
||||
_extractFileIdsFromValue(step.get("inputSnapshot") or {}, inputIds)
|
||||
_extractFileIdsFromValue(step.get("output") or {}, outputIds)
|
||||
perStepFileIds.append((inputIds, outputIds))
|
||||
allFileIds.update(inputIds)
|
||||
allFileIds.update(outputIds)
|
||||
|
||||
nodeOutputs = run.get("nodeOutputs") or {}
|
||||
runLevelIds: set[str] = set()
|
||||
_extractFileIdsFromValue(nodeOutputs, runLevelIds)
|
||||
allFileIds.update(runLevelIds)
|
||||
|
||||
fileMetaById: dict[str, dict] = {}
|
||||
try:
|
||||
from modules.datamodels.datamodelFiles import FileItem
|
||||
from modules.interfaces.interfaceDbManagement import ComponentObjects
|
||||
mgmtDb = ComponentObjects().db
|
||||
if mgmtDb._ensureTableExists(FileItem):
|
||||
for fid in allFileIds:
|
||||
try:
|
||||
rec = mgmtDb.getRecord(FileItem, fid)
|
||||
if rec:
|
||||
recDict = dict(rec)
|
||||
fileMetaById[fid] = {
|
||||
"id": fid,
|
||||
"fileName": recDict.get("fileName") or recDict.get("name"),
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning("getWorkspaceRunDetail: file lookup failed: %s", e)
|
||||
|
||||
def _resolveFileList(ids: set[str]) -> list[dict]:
|
||||
return [fileMetaById[fid] for fid in ids if fid in fileMetaById]
|
||||
|
||||
assignedFileIds: set[str] = set()
|
||||
for step, (inputIds, outputIds) in zip(steps, perStepFileIds):
|
||||
step["inputFiles"] = _resolveFileList(inputIds)
|
||||
step["outputFiles"] = _resolveFileList(outputIds)
|
||||
assignedFileIds.update(inputIds)
|
||||
assignedFileIds.update(outputIds)
|
||||
|
||||
unassignedFiles = _resolveFileList(allFileIds - assignedFileIds)
|
||||
allFiles = _resolveFileList(allFileIds)
|
||||
|
||||
run["workflowLabel"] = run.get("label") or workflow.get("label") or wfId
|
||||
run["targetFeatureInstanceId"] = tid
|
||||
|
||||
targetInstanceLabel = None
|
||||
if tid:
|
||||
try:
|
||||
from modules.routes.routeHelpers import resolveInstanceLabels
|
||||
labelMap = resolveInstanceLabels([tid])
|
||||
targetInstanceLabel = labelMap.get(tid)
|
||||
except Exception:
|
||||
pass
|
||||
run["targetInstanceLabel"] = targetInstanceLabel
|
||||
|
||||
return {
|
||||
"run": run,
|
||||
"workflow": {
|
||||
"id": workflow.get("id"),
|
||||
"label": workflow.get("label"),
|
||||
"targetFeatureInstanceId": tid,
|
||||
"featureInstanceId": workflow.get("featureInstanceId"),
|
||||
"tags": workflow.get("tags", []),
|
||||
} if workflow else None,
|
||||
"steps": steps,
|
||||
"files": allFiles,
|
||||
"unassignedFiles": unassignedFiles,
|
||||
}
|
||||
|
|
@ -152,10 +152,28 @@ async def get_connections(
|
|||
- GET /api/connections/?mode=filterValues&column=status
|
||||
- GET /api/connections/?mode=ids
|
||||
"""
|
||||
from modules.routes.routeHelpers import handleFilterValuesInMemory, handleIdsInMemory, enrichRowsWithFkLabels
|
||||
from modules.routes.routeHelpers import (
|
||||
handleFilterValuesInMemory, handleIdsInMemory, enrichRowsWithFkLabels,
|
||||
handleGroupingInRequest, applyGroupScopeFilter,
|
||||
)
|
||||
|
||||
CONTEXT_KEY = "connections"
|
||||
|
||||
# Parse pagination params early — needed for grouping in all modes
|
||||
paginationParams = None
|
||||
if pagination:
|
||||
try:
|
||||
paginationDict = json.loads(pagination)
|
||||
if paginationDict:
|
||||
paginationDict = normalize_pagination_dict(paginationDict)
|
||||
paginationParams = PaginationParams(**paginationDict)
|
||||
except (json.JSONDecodeError, ValueError) as e:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid pagination parameter: {str(e)}")
|
||||
|
||||
interface = getInterface(currentUser)
|
||||
groupCtx = handleGroupingInRequest(paginationParams, interface, CONTEXT_KEY)
|
||||
|
||||
def _buildEnhancedItems():
|
||||
interface = getInterface(currentUser)
|
||||
connections = interface.getUserConnections(currentUser.id)
|
||||
items = []
|
||||
for connection in connections:
|
||||
|
|
@ -182,6 +200,7 @@ async def get_connections(
|
|||
try:
|
||||
items = _buildEnhancedItems()
|
||||
enrichRowsWithFkLabels(items, UserConnection)
|
||||
items = applyGroupScopeFilter(items, groupCtx.itemIds)
|
||||
return handleFilterValuesInMemory(items, column, pagination)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting filter values for connections: {str(e)}")
|
||||
|
|
@ -189,63 +208,40 @@ async def get_connections(
|
|||
|
||||
if mode == "ids":
|
||||
try:
|
||||
return handleIdsInMemory(_buildEnhancedItems(), pagination)
|
||||
items = applyGroupScopeFilter(_buildEnhancedItems(), groupCtx.itemIds)
|
||||
return handleIdsInMemory(items, pagination)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting IDs for connections: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
try:
|
||||
interface = getInterface(currentUser)
|
||||
|
||||
# NOTE: Cannot use db.getRecordsetPaginated() here because each connection
|
||||
# is enriched with computed tokenStatus/tokenExpiresAt (requires per-row DB lookup).
|
||||
# Token refresh also may trigger re-fetch. Connections per user are typically < 10,
|
||||
# so in-memory pagination is acceptable.
|
||||
|
||||
# Parse pagination parameter
|
||||
paginationParams = None
|
||||
if pagination:
|
||||
try:
|
||||
paginationDict = json.loads(pagination)
|
||||
if paginationDict:
|
||||
# Normalize pagination dict (handles top-level "search" field)
|
||||
paginationDict = normalize_pagination_dict(paginationDict)
|
||||
paginationParams = PaginationParams(**paginationDict)
|
||||
except (json.JSONDecodeError, ValueError) as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid pagination parameter: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# SECURITY FIX: All users (including admins) can only see their own connections
|
||||
# This prevents admin from seeing other users' connections and causing confusion
|
||||
connections = interface.getUserConnections(currentUser.id)
|
||||
|
||||
|
||||
# Perform silent token refresh for expired OAuth connections
|
||||
try:
|
||||
refresh_result = await token_refresh_service.refresh_expired_tokens(currentUser.id)
|
||||
if refresh_result.get("refreshed", 0) > 0:
|
||||
logger.info(f"Silently refreshed {refresh_result['refreshed']} tokens for user {currentUser.id}")
|
||||
# Re-fetch connections to get updated token status
|
||||
connections = interface.getUserConnections(currentUser.id)
|
||||
except Exception as e:
|
||||
logger.warning(f"Silent token refresh failed for user {currentUser.id}: {str(e)}")
|
||||
# Continue with original connections even if refresh fails
|
||||
|
||||
# Enhance each connection with token status information and convert to dict
|
||||
|
||||
enhanced_connections_dict = []
|
||||
for connection in connections:
|
||||
# Get token status for this connection
|
||||
tokenStatus, tokenExpiresAt = getTokenStatusForConnection(interface, connection.id)
|
||||
|
||||
# Convert to dict for filtering/sorting
|
||||
connection_dict = {
|
||||
"id": connection.id,
|
||||
"userId": connection.userId,
|
||||
"authority": connection.authority.value if hasattr(connection.authority, 'value') else str(connection.authority),
|
||||
"externalId": connection.externalId,
|
||||
"externalUsername": connection.externalUsername or "",
|
||||
"externalEmail": connection.externalEmail, # Keep None instead of converting to empty string
|
||||
"externalEmail": connection.externalEmail,
|
||||
"status": connection.status.value if hasattr(connection.status, 'value') else str(connection.status),
|
||||
"connectedAt": connection.connectedAt,
|
||||
"lastChecked": connection.lastChecked,
|
||||
|
|
@ -254,24 +250,26 @@ async def get_connections(
|
|||
"tokenExpiresAt": tokenExpiresAt
|
||||
}
|
||||
enhanced_connections_dict.append(connection_dict)
|
||||
|
||||
|
||||
enrichRowsWithFkLabels(enhanced_connections_dict, UserConnection)
|
||||
enhanced_connections_dict = applyGroupScopeFilter(enhanced_connections_dict, groupCtx.itemIds)
|
||||
|
||||
if paginationParams is None:
|
||||
return {
|
||||
"items": enhanced_connections_dict,
|
||||
"pagination": None,
|
||||
"groupTree": groupCtx.groupTree,
|
||||
}
|
||||
|
||||
|
||||
# Apply filtering if provided
|
||||
if paginationParams.filters:
|
||||
component_interface = ComponentObjects()
|
||||
component_interface.setUserContext(currentUser)
|
||||
enhanced_connections_dict = component_interface._applyFilters(
|
||||
enhanced_connections_dict,
|
||||
enhanced_connections_dict,
|
||||
paginationParams.filters
|
||||
)
|
||||
|
||||
|
||||
# Apply sorting if provided
|
||||
if paginationParams.sort:
|
||||
component_interface = ComponentObjects()
|
||||
|
|
@ -280,14 +278,14 @@ async def get_connections(
|
|||
enhanced_connections_dict,
|
||||
paginationParams.sort
|
||||
)
|
||||
|
||||
|
||||
totalItems = len(enhanced_connections_dict)
|
||||
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
|
||||
|
||||
|
||||
startIdx = (paginationParams.page - 1) * paginationParams.pageSize
|
||||
endIdx = startIdx + paginationParams.pageSize
|
||||
paged_connections = enhanced_connections_dict[startIdx:endIdx]
|
||||
|
||||
|
||||
return {
|
||||
"items": paged_connections,
|
||||
"pagination": PaginationMetadata(
|
||||
|
|
@ -298,6 +296,7 @@ async def get_connections(
|
|||
sort=paginationParams.sort,
|
||||
filters=paginationParams.filters
|
||||
).model_dump(),
|
||||
"groupTree": groupCtx.groupTree,
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
|
|
@ -351,11 +350,18 @@ def create_connection(
|
|||
externalUsername="", # Will be set after OAuth
|
||||
status=ConnectionStatus.PENDING # Start with PENDING status
|
||||
)
|
||||
|
||||
|
||||
# Apply knowledge consent + preferences from request body before persisting
|
||||
knowledge_enabled = connection_data.get("knowledgeIngestionEnabled")
|
||||
if isinstance(knowledge_enabled, bool):
|
||||
connection.knowledgeIngestionEnabled = knowledge_enabled
|
||||
knowledge_prefs = connection_data.get("knowledgePreferences")
|
||||
if isinstance(knowledge_prefs, dict):
|
||||
connection.knowledgePreferences = knowledge_prefs
|
||||
|
||||
# Save connection record - models now handle timestamp serialization automatically
|
||||
interface.db.recordModify(UserConnection, connection.id, connection.model_dump())
|
||||
|
||||
|
||||
|
||||
return connection
|
||||
|
||||
except HTTPException:
|
||||
|
|
@ -586,8 +592,25 @@ def disconnect_service(
|
|||
detail=routeApiMsg("Connection not found")
|
||||
)
|
||||
|
||||
# Update connection status
|
||||
connection.status = ConnectionStatus.INACTIVE
|
||||
# Fire revoked event BEFORE DB status change so knowledge purge and
|
||||
# status mutation form one logical step; subscribers see the
|
||||
# connection as it was. INACTIVE does not exist on the enum — REVOKED
|
||||
# is the correct terminal-but-retained state (deleted rows are
|
||||
# handled in DELETE /{id}).
|
||||
try:
|
||||
from modules.shared.callbackRegistry import callbackRegistry
|
||||
|
||||
callbackRegistry.trigger(
|
||||
"connection.revoked",
|
||||
connectionId=connectionId,
|
||||
authority=str(getattr(connection.authority, "value", connection.authority) or ""),
|
||||
userId=str(currentUser.id),
|
||||
reason="disconnected",
|
||||
)
|
||||
except Exception as _cbErr:
|
||||
logger.warning("connection.revoked callback failed for %s: %s", connectionId, _cbErr)
|
||||
|
||||
connection.status = ConnectionStatus.REVOKED
|
||||
connection.lastChecked = getUtcTimestamp()
|
||||
|
||||
# Update connection record - models now handle timestamp serialization automatically
|
||||
|
|
@ -636,6 +659,23 @@ def delete_connection(
|
|||
detail=routeApiMsg("Connection not found")
|
||||
)
|
||||
|
||||
# Fire revoked event BEFORE the row disappears so consumers still
|
||||
# have authority/connection context for observability; purge itself
|
||||
# targets FileContentIndex rows by connectionId which are unaffected
|
||||
# by the UserConnection delete.
|
||||
try:
|
||||
from modules.shared.callbackRegistry import callbackRegistry
|
||||
|
||||
callbackRegistry.trigger(
|
||||
"connection.revoked",
|
||||
connectionId=connectionId,
|
||||
authority=str(getattr(connection.authority, "value", connection.authority) or ""),
|
||||
userId=str(currentUser.id),
|
||||
reason="deleted",
|
||||
)
|
||||
except Exception as _cbErr:
|
||||
logger.warning("connection.revoked callback failed for %s: %s", connectionId, _cbErr)
|
||||
|
||||
# Remove the connection - only need connectionId since permissions are verified
|
||||
interface.removeUserConnection(connectionId)
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ from modules.auth import limiter, getCurrentUser, getRequestContext, RequestCont
|
|||
# Import interfaces
|
||||
import modules.interfaces.interfaceDbManagement as interfaceDbManagement
|
||||
from modules.datamodels.datamodelFiles import FileItem, FilePreview
|
||||
from modules.datamodels.datamodelFileFolder import FileFolder
|
||||
from modules.shared.attributeUtils import getModelAttributeDefinitions
|
||||
from modules.datamodels.datamodelUam import User
|
||||
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata, normalize_pagination_dict
|
||||
|
|
@ -77,7 +76,7 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
|
|||
"""Background task: pre-scan + extraction + knowledge indexing.
|
||||
Step 1: Structure Pre-Scan (AI-free) -> FileContentIndex (persisted)
|
||||
Step 2: Content extraction via runExtraction -> ContentParts
|
||||
Step 3: KnowledgeService.indexFile -> chunking + embedding -> Knowledge Store"""
|
||||
Step 3: KnowledgeService.requestIngestion -> idempotent chunking + embedding -> Knowledge Store"""
|
||||
userId = user.id if hasattr(user, "id") else str(user)
|
||||
try:
|
||||
mgmtInterface = interfaceDbManagement.getInterface(user)
|
||||
|
|
@ -122,9 +121,30 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
|
|||
f"{contentIndex.totalObjects} objects"
|
||||
)
|
||||
|
||||
# Persist FileContentIndex immediately
|
||||
# Persist FileContentIndex immediately.
|
||||
# IMPORTANT: preserve `_ingestion` metadata and `status="indexed"` from any
|
||||
# prior successful run — otherwise this upsert wipes the idempotency cache
|
||||
# and requestIngestion cannot detect duplicates (AC4 breaks).
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||
knowledgeDb = getKnowledgeInterface()
|
||||
try:
|
||||
_existing = knowledgeDb.getFileContentIndex(fileId)
|
||||
except Exception:
|
||||
_existing = None
|
||||
if _existing:
|
||||
_existingStruct = (
|
||||
_existing.get("structure") if isinstance(_existing, dict)
|
||||
else getattr(_existing, "structure", {})
|
||||
) or {}
|
||||
_existingStatus = (
|
||||
_existing.get("status") if isinstance(_existing, dict)
|
||||
else getattr(_existing, "status", "")
|
||||
) or ""
|
||||
if "_ingestion" in _existingStruct:
|
||||
contentIndex.structure = dict(contentIndex.structure or {})
|
||||
contentIndex.structure["_ingestion"] = _existingStruct["_ingestion"]
|
||||
if _existingStatus == "indexed":
|
||||
contentIndex.status = "indexed"
|
||||
knowledgeDb.upsertFileContentIndex(contentIndex)
|
||||
|
||||
# Step 2: Content extraction (AI-free, produces ContentParts)
|
||||
|
|
@ -134,7 +154,10 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
|
|||
|
||||
extractorRegistry = ExtractorRegistry()
|
||||
chunkerRegistry = ChunkerRegistry()
|
||||
options = ExtractionOptions()
|
||||
# mergeStrategy=None: keep per-page / per-section granularity for RAG ingestion.
|
||||
# The default MergeStrategy concatenates all text parts into a single blob, which
|
||||
# collapses a 500-page PDF into one ContentChunk and destroys semantic retrieval.
|
||||
options = ExtractionOptions(mergeStrategy=None)
|
||||
|
||||
extracted = runExtraction(
|
||||
extractorRegistry, chunkerRegistry,
|
||||
|
|
@ -181,15 +204,21 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
|
|||
)
|
||||
knowledgeService = getService("knowledge", ctx)
|
||||
|
||||
await knowledgeService.indexFile(
|
||||
fileId=fileId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
|
||||
mandateId=str(mandate_id) if mandate_id else "",
|
||||
contentObjects=contentObjects,
|
||||
structure=contentIndex.structure,
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="file",
|
||||
sourceId=fileId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
|
||||
mandateId=str(mandate_id) if mandate_id else "",
|
||||
contentObjects=contentObjects,
|
||||
structure=contentIndex.structure,
|
||||
provenance={"lane": "upload", "route": "routeDataFiles._autoIndexFile"},
|
||||
)
|
||||
)
|
||||
|
||||
# Re-acquire interface after await to avoid stale user context from the singleton
|
||||
|
|
@ -249,7 +278,6 @@ def get_files(
|
|||
try:
|
||||
paginationDict = json.loads(pagination)
|
||||
if paginationDict:
|
||||
# Normalize pagination dict (handles top-level "search" field)
|
||||
paginationDict = normalize_pagination_dict(paginationDict)
|
||||
paginationParams = PaginationParams(**paginationDict)
|
||||
except (json.JSONDecodeError, ValueError) as e:
|
||||
|
|
@ -257,51 +285,43 @@ def get_files(
|
|||
status_code=400,
|
||||
detail=f"Invalid pagination parameter: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
from modules.routes.routeHelpers import (
|
||||
handleIdsMode,
|
||||
handleFilterValuesInMemory,
|
||||
handleGroupingInRequest, applyGroupScopeFilter,
|
||||
)
|
||||
import modules.interfaces.interfaceDbApp as _appIface
|
||||
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None
|
||||
)
|
||||
appInterface = _appIface.getInterface(currentUser)
|
||||
groupCtx = handleGroupingInRequest(paginationParams, appInterface, "files/list")
|
||||
|
||||
def _filesToDicts(fileItems):
|
||||
return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]
|
||||
|
||||
if mode == "filterValues":
|
||||
if not column:
|
||||
raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
|
||||
allFiles = managementInterface.getAllFiles()
|
||||
items = allFiles if isinstance(allFiles, list) else (allFiles.items if hasattr(allFiles, "items") else [])
|
||||
itemDicts = [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in items]
|
||||
itemDicts = _filesToDicts(items)
|
||||
enrichRowsWithFkLabels(itemDicts, FileItem)
|
||||
itemDicts = applyGroupScopeFilter(itemDicts, groupCtx.itemIds)
|
||||
return handleFilterValuesInMemory(itemDicts, column, pagination)
|
||||
|
||||
if mode == "ids":
|
||||
recordFilter = {"sysCreatedBy": managementInterface.userId}
|
||||
return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
|
||||
|
||||
recordFilter = None
|
||||
if paginationParams and paginationParams.filters and "folderId" in paginationParams.filters:
|
||||
fVal = paginationParams.filters.get("folderId")
|
||||
# For a concrete folderId we use recordFilter (exact equality).
|
||||
# For null / empty (= "root") we keep it in pagination.filters so the
|
||||
# connector applies `IS NULL OR = ''` – files predating the folderId
|
||||
# fix were stored with an empty string instead of NULL.
|
||||
if fVal is None or (isinstance(fVal, str) and fVal.strip() == ""):
|
||||
paginationParams.filters["folderId"] = None
|
||||
else:
|
||||
paginationParams.filters.pop("folderId")
|
||||
recordFilter = {"folderId": fVal}
|
||||
|
||||
result = managementInterface.getAllFiles(pagination=paginationParams, recordFilter=recordFilter)
|
||||
|
||||
def _filesToDicts(items):
|
||||
return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in items]
|
||||
result = managementInterface.getAllFiles(pagination=paginationParams)
|
||||
|
||||
if paginationParams:
|
||||
enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem)
|
||||
enriched = applyGroupScopeFilter(enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem), groupCtx.itemIds)
|
||||
return {
|
||||
"items": enriched,
|
||||
"pagination": PaginationMetadata(
|
||||
|
|
@ -312,11 +332,12 @@ def get_files(
|
|||
sort=paginationParams.sort,
|
||||
filters=paginationParams.filters
|
||||
).model_dump(),
|
||||
"groupTree": groupCtx.groupTree,
|
||||
}
|
||||
else:
|
||||
items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
|
||||
enriched = enrichRowsWithFkLabels(_filesToDicts(items), FileItem)
|
||||
return {"items": enriched, "pagination": None}
|
||||
enriched = applyGroupScopeFilter(enrichRowsWithFkLabels(_filesToDicts(items), FileItem), groupCtx.itemIds)
|
||||
return {"items": enriched, "pagination": None, "groupTree": groupCtx.groupTree}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
@ -327,6 +348,36 @@ def get_files(
|
|||
)
|
||||
|
||||
|
||||
def _addFileToGroup(appInterface, fileId: str, groupId: str, contextKey: str = "files/list"):
|
||||
"""Add a file to a group in the persisted groupTree (upsert)."""
|
||||
from modules.routes.routeHelpers import _collectItemIds
|
||||
try:
|
||||
existing = appInterface.getTableGrouping(contextKey)
|
||||
if not existing:
|
||||
return
|
||||
nodes = [n.model_dump() if hasattr(n, 'model_dump') else n for n in existing.rootGroups]
|
||||
def _add(nds):
|
||||
for nd in nds:
|
||||
nid = nd.get("id") if isinstance(nd, dict) else getattr(nd, "id", None)
|
||||
if nid == groupId:
|
||||
itemIds = list(nd.get("itemIds", []) if isinstance(nd, dict) else getattr(nd, "itemIds", []))
|
||||
if fileId not in itemIds:
|
||||
itemIds.append(fileId)
|
||||
if isinstance(nd, dict):
|
||||
nd["itemIds"] = itemIds
|
||||
else:
|
||||
nd.itemIds = itemIds
|
||||
return True
|
||||
subs = nd.get("subGroups", []) if isinstance(nd, dict) else getattr(nd, "subGroups", [])
|
||||
if _add(subs):
|
||||
return True
|
||||
return False
|
||||
_add(nodes)
|
||||
appInterface.upsertTableGrouping(contextKey, nodes)
|
||||
except Exception as e:
|
||||
logger.warning(f"_addFileToGroup failed: {e}")
|
||||
|
||||
|
||||
@router.post("/upload", status_code=status.HTTP_201_CREATED)
|
||||
@limiter.limit("10/minute")
|
||||
async def upload_file(
|
||||
|
|
@ -334,7 +385,7 @@ async def upload_file(
|
|||
file: UploadFile = File(...),
|
||||
workflowId: Optional[str] = Form(None),
|
||||
featureInstanceId: Optional[str] = Form(None),
|
||||
folderId: Optional[str] = Form(None),
|
||||
groupId: Optional[str] = Form(None),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> JSONResponse:
|
||||
|
|
@ -358,31 +409,22 @@ async def upload_file(
|
|||
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
||||
detail=f"File too large. Maximum size: {interfaceDbManagement.APP_CONFIG.get('File_Management_MAX_UPLOAD_SIZE_MB')}MB"
|
||||
)
|
||||
|
||||
# Normalize folderId: empty string / "null" / "root" → None (root folder)
|
||||
normalizedFolderId: Optional[str] = folderId
|
||||
if isinstance(normalizedFolderId, str):
|
||||
trimmed = normalizedFolderId.strip()
|
||||
if not trimmed or trimmed.lower() in {"null", "none", "root"}:
|
||||
normalizedFolderId = None
|
||||
else:
|
||||
normalizedFolderId = trimmed
|
||||
|
||||
# Save file via LucyDOM interface in the database
|
||||
fileItem, duplicateType = managementInterface.saveUploadedFile(
|
||||
fileContent, file.filename, folderId=normalizedFolderId
|
||||
fileContent, file.filename
|
||||
)
|
||||
|
||||
if featureInstanceId and not fileItem.featureInstanceId:
|
||||
managementInterface.updateFile(fileItem.id, {"featureInstanceId": featureInstanceId})
|
||||
fileItem.featureInstanceId = featureInstanceId
|
||||
|
||||
# For exact duplicates we keep the existing record, but move it into the
|
||||
# target folder so the user actually sees their upload land where they expect.
|
||||
if duplicateType == "exact_duplicate" and normalizedFolderId != getattr(fileItem, "folderId", None):
|
||||
managementInterface.updateFile(fileItem.id, {"folderId": normalizedFolderId})
|
||||
fileItem.folderId = normalizedFolderId
|
||||
|
||||
# Add to group if groupId was provided
|
||||
if groupId:
|
||||
import modules.interfaces.interfaceDbApp as _appIface
|
||||
appInterface = _appIface.getInterface(currentUser)
|
||||
_addFileToGroup(appInterface, fileItem.id, groupId)
|
||||
|
||||
# Determine response message based on duplicate type
|
||||
if duplicateType == "exact_duplicate":
|
||||
message = f"File '{file.filename}' already exists with identical content. Reusing existing file."
|
||||
|
|
@ -447,347 +489,6 @@ async def upload_file(
|
|||
detail=f"Error during file upload: {str(e)}"
|
||||
)
|
||||
|
||||
# ── Folder endpoints (MUST be before /{fileId} catch-all) ─────────────────────
|
||||
|
||||
@router.get("/folders", response_model=List[Dict[str, Any]])
|
||||
@limiter.limit("30/minute")
|
||||
def list_folders(
|
||||
request: Request,
|
||||
parentId: Optional[str] = Query(None, description="Parent folder ID (omit for all folders)"),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext)
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""List folders for the current user."""
|
||||
try:
|
||||
mgmt = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
if parentId is not None:
|
||||
return mgmt.listFolders(parentId=parentId)
|
||||
return mgmt.listFolders()
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing folders: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/folders", status_code=status.HTTP_201_CREATED)
|
||||
@limiter.limit("10/minute")
|
||||
def create_folder(
|
||||
request: Request,
|
||||
body: Dict[str, Any] = Body(...),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext)
|
||||
) -> Dict[str, Any]:
|
||||
"""Create a new folder."""
|
||||
name = body.get("name", "")
|
||||
parentId = body.get("parentId")
|
||||
if not name:
|
||||
raise HTTPException(status_code=400, detail=routeApiMsg("name is required"))
|
||||
try:
|
||||
mgmt = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
return mgmt.createFolder(name=name, parentId=parentId)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating folder: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.put("/folders/{folderId}")
|
||||
@limiter.limit("10/minute")
|
||||
def rename_folder(
|
||||
request: Request,
|
||||
folderId: str = Path(...),
|
||||
body: Dict[str, Any] = Body(...),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext)
|
||||
) -> Dict[str, Any]:
|
||||
"""Rename a folder."""
|
||||
newName = body.get("name", "")
|
||||
if not newName:
|
||||
raise HTTPException(status_code=400, detail=routeApiMsg("name is required"))
|
||||
try:
|
||||
mgmt = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
mgmt.renameFolder(folderId, newName)
|
||||
return {"success": True, "folderId": folderId, "name": newName}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Error renaming folder: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.delete("/folders/{folderId}")
|
||||
@limiter.limit("10/minute")
|
||||
def delete_folder(
|
||||
request: Request,
|
||||
folderId: str = Path(...),
|
||||
recursive: bool = Query(False, description="Delete folder contents recursively"),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext)
|
||||
) -> Dict[str, Any]:
|
||||
"""Delete a folder. Use recursive=true to delete non-empty folders."""
|
||||
try:
|
||||
mgmt = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
return mgmt.deleteFolder(folderId, recursive=recursive)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting folder: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/folders/{folderId}/move")
|
||||
@limiter.limit("10/minute")
|
||||
def move_folder(
|
||||
request: Request,
|
||||
folderId: str = Path(...),
|
||||
body: Dict[str, Any] = Body(...),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext)
|
||||
) -> Dict[str, Any]:
|
||||
"""Move a folder to a new parent."""
|
||||
targetParentId = body.get("targetParentId")
|
||||
try:
|
||||
mgmt = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
mgmt.moveFolder(folderId, targetParentId)
|
||||
return {"success": True, "folderId": folderId, "parentId": targetParentId}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Error moving folder: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.patch("/folders/{folderId}/scope")
|
||||
@limiter.limit("10/minute")
|
||||
def _updateFolderScope(
|
||||
request: Request,
|
||||
folderId: str = Path(..., description="ID of the folder"),
|
||||
scope: str = Body(..., embed=True),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, Any]:
|
||||
"""Update the scope of a folder. Propagates to all files inside (recursively). Global scope requires sysAdmin."""
|
||||
validScopes = {"personal", "featureInstance", "mandate", "global"}
|
||||
if scope not in validScopes:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid scope: {scope}. Must be one of {validScopes}")
|
||||
if scope == "global" and not context.isSysAdmin:
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
|
||||
try:
|
||||
mgmt = interfaceDbManagement.getInterface(
|
||||
context.user,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
folder = mgmt.getFolder(folderId)
|
||||
if not folder:
|
||||
raise HTTPException(status_code=404, detail=routeApiMsg("Folder not found"))
|
||||
mgmt.updateFolder(folderId, {"scope": scope})
|
||||
fileIds = _collectFolderFileIds(mgmt, folderId)
|
||||
for fid in fileIds:
|
||||
try:
|
||||
mgmt.updateFile(fid, {"scope": scope})
|
||||
except Exception as e:
|
||||
logger.error("Folder scope propagation: failed to update file %s: %s", fid, e)
|
||||
logger.info("Updated scope=%s for folder %s: %d files affected", scope, folderId, len(fileIds))
|
||||
return {"folderId": folderId, "scope": scope, "filesUpdated": len(fileIds)}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating folder scope: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.patch("/folders/{folderId}/neutralize")
|
||||
@limiter.limit("10/minute")
|
||||
def updateFolderNeutralize(
|
||||
request: Request,
|
||||
background_tasks: BackgroundTasks,
|
||||
folderId: str = Path(..., description="ID of the folder"),
|
||||
neutralize: bool = Body(..., embed=True),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, Any]:
|
||||
"""Toggle neutralization on a folder. Propagates to all files inside (recursively).
|
||||
|
||||
When turning ON: all files in the folder get ``neutralize=True``, their
|
||||
knowledge indexes are purged synchronously, and background re-indexing
|
||||
is triggered.
|
||||
When turning OFF: files revert to ``neutralize=False`` unless they were
|
||||
individually marked (not implemented yet -- all are reverted).
|
||||
"""
|
||||
try:
|
||||
mgmt = interfaceDbManagement.getInterface(
|
||||
context.user,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
|
||||
folder = mgmt.getFolder(folderId)
|
||||
if not folder:
|
||||
raise HTTPException(status_code=404, detail=routeApiMsg("Folder not found"))
|
||||
|
||||
mgmt.updateFolder(folderId, {"neutralize": neutralize})
|
||||
|
||||
fileIds = _collectFolderFileIds(mgmt, folderId)
|
||||
logger.info("Folder neutralize toggle %s for folder %s: %d files affected", neutralize, folderId, len(fileIds))
|
||||
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||
knowledgeDb = getKnowledgeInterface()
|
||||
|
||||
for fid in fileIds:
|
||||
try:
|
||||
mgmt.updateFile(fid, {"neutralize": neutralize})
|
||||
if neutralize:
|
||||
try:
|
||||
knowledgeDb.deleteFileContentIndex(fid)
|
||||
except Exception as e:
|
||||
logger.warning("Folder neutralize: failed to purge index for file %s: %s", fid, e)
|
||||
else:
|
||||
try:
|
||||
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
||||
indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fid})
|
||||
for idx in indices:
|
||||
idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
|
||||
if idxId:
|
||||
knowledgeDb.db.recordModify(FileContentIndex, idxId, {
|
||||
"neutralizationStatus": "original",
|
||||
"isNeutralized": False,
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning("Folder neutralize OFF: metadata update failed for %s: %s", fid, e)
|
||||
except Exception as e:
|
||||
logger.error("Folder neutralize: failed to update file %s: %s", fid, e)
|
||||
|
||||
for fid in fileIds:
|
||||
fileMeta = mgmt.getFile(fid)
|
||||
if fileMeta:
|
||||
fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
|
||||
mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")
|
||||
|
||||
async def _reindex(fileId=fid, fileName=fn, mimeType=mt):
|
||||
try:
|
||||
await _autoIndexFile(fileId=fileId, fileName=fileName, mimeType=mimeType, user=context.user)
|
||||
except Exception as ex:
|
||||
logger.error("Folder neutralize re-index failed for %s: %s", fileId, ex)
|
||||
|
||||
background_tasks.add_task(_reindex)
|
||||
|
||||
return {"folderId": folderId, "neutralize": neutralize, "filesUpdated": len(fileIds)}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating folder neutralize flag: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
def _collectFolderFileIds(mgmt, folderId: str) -> List[str]:
|
||||
"""Recursively collect all file IDs in a folder and its sub-folders."""
|
||||
fileIds = []
|
||||
try:
|
||||
files = mgmt.listFiles(folderId=folderId)
|
||||
if isinstance(files, dict):
|
||||
files = files.get("files", [])
|
||||
for f in (files or []):
|
||||
fid = f.get("id") if isinstance(f, dict) else getattr(f, "id", None)
|
||||
if fid:
|
||||
fileIds.append(fid)
|
||||
except Exception as e:
|
||||
logger.warning("_collectFolderFileIds: listFiles failed for folder %s: %s", folderId, e)
|
||||
|
||||
try:
|
||||
subFolders = mgmt.listFolders(parentId=folderId)
|
||||
for sf in (subFolders or []):
|
||||
sfId = sf.get("id") if isinstance(sf, dict) else getattr(sf, "id", None)
|
||||
if sfId:
|
||||
fileIds.extend(_collectFolderFileIds(mgmt, sfId))
|
||||
except Exception as e:
|
||||
logger.warning("_collectFolderFileIds: listFolders failed for folder %s: %s", folderId, e)
|
||||
|
||||
return fileIds
|
||||
|
||||
|
||||
@router.get("/folders/{folderId}/download")
|
||||
@limiter.limit("10/minute")
|
||||
def download_folder(
|
||||
request: Request,
|
||||
folderId: str = Path(..., description="ID of the folder to download as ZIP"),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext)
|
||||
) -> Response:
|
||||
"""Download a folder (including subfolders) as a ZIP archive."""
|
||||
import io
|
||||
import zipfile
|
||||
import urllib.parse
|
||||
|
||||
try:
|
||||
mgmt = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
|
||||
folder = mgmt.getFolder(folderId)
|
||||
if not folder:
|
||||
raise HTTPException(status_code=404, detail=f"Folder {folderId} not found")
|
||||
|
||||
folderName = folder.get("name", "download")
|
||||
|
||||
def _collectFiles(parentId: str, pathPrefix: str):
|
||||
"""Recursively collect (zipPath, fileId) tuples."""
|
||||
entries = []
|
||||
for f in mgmt._getFilesByCurrentUser(recordFilter={"folderId": parentId}):
|
||||
fname = f.get("fileName") or f.get("name") or f.get("id", "file")
|
||||
entries.append((f"{pathPrefix}{fname}", f["id"]))
|
||||
for sub in mgmt.listFolders(parentId=parentId):
|
||||
subName = sub.get("name", sub["id"])
|
||||
entries.extend(_collectFiles(sub["id"], f"{pathPrefix}{subName}/"))
|
||||
return entries
|
||||
|
||||
fileEntries = _collectFiles(folderId, "")
|
||||
if not fileEntries:
|
||||
raise HTTPException(status_code=404, detail=routeApiMsg("Folder is empty"))
|
||||
|
||||
buf = io.BytesIO()
|
||||
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
for zipPath, fileId in fileEntries:
|
||||
data = mgmt.getFileData(fileId)
|
||||
if data:
|
||||
zf.writestr(zipPath, data)
|
||||
|
||||
buf.seek(0)
|
||||
zipBytes = buf.getvalue()
|
||||
encodedName = urllib.parse.quote(f"{folderName}.zip")
|
||||
|
||||
return Response(
|
||||
content=zipBytes,
|
||||
media_type="application/zip",
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename*=UTF-8''{encodedName}"
|
||||
}
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading folder as ZIP: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Error downloading folder: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/batch-delete")
|
||||
|
|
@ -798,13 +499,11 @@ def batch_delete_items(
|
|||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext)
|
||||
) -> Dict[str, Any]:
|
||||
"""Batch delete files/folders with a single SQL-backed operation per type."""
|
||||
"""Batch delete files."""
|
||||
fileIds = body.get("fileIds") or []
|
||||
folderIds = body.get("folderIds") or []
|
||||
recursiveFolders = bool(body.get("recursiveFolders", True))
|
||||
|
||||
if not isinstance(fileIds, list) or not isinstance(folderIds, list):
|
||||
raise HTTPException(status_code=400, detail=routeApiMsg("fileIds and folderIds must be arrays"))
|
||||
if not isinstance(fileIds, list):
|
||||
raise HTTPException(status_code=400, detail=routeApiMsg("fileIds must be an array"))
|
||||
|
||||
try:
|
||||
mgmt = interfaceDbManagement.getInterface(
|
||||
|
|
@ -813,17 +512,12 @@ def batch_delete_items(
|
|||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
|
||||
result = {"deletedFiles": 0, "deletedFolders": 0}
|
||||
result = {"deletedFiles": 0}
|
||||
|
||||
if fileIds:
|
||||
fileResult = mgmt.deleteFilesBatch(fileIds)
|
||||
result["deletedFiles"] += fileResult.get("deletedFiles", 0)
|
||||
|
||||
if folderIds:
|
||||
folderResult = mgmt.deleteFoldersBatch(folderIds, recursive=recursiveFolders)
|
||||
result["deletedFiles"] += folderResult.get("deletedFiles", 0)
|
||||
result["deletedFolders"] += folderResult.get("deletedFolders", 0)
|
||||
|
||||
return result
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
|
@ -832,45 +526,189 @@ def batch_delete_items(
|
|||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/batch-move")
|
||||
@limiter.limit("10/minute")
|
||||
def batch_move_items(
|
||||
request: Request,
|
||||
body: Dict[str, Any] = Body(...),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext)
|
||||
) -> Dict[str, Any]:
|
||||
"""Batch move files/folders with a single SQL-backed operation per type."""
|
||||
fileIds = body.get("fileIds") or []
|
||||
folderIds = body.get("folderIds") or []
|
||||
targetFolderId = body.get("targetFolderId")
|
||||
targetParentId = body.get("targetParentId")
|
||||
|
||||
if not isinstance(fileIds, list) or not isinstance(folderIds, list):
|
||||
raise HTTPException(status_code=400, detail=routeApiMsg("fileIds and folderIds must be arrays"))
|
||||
# ── Group bulk endpoints ──────────────────────────────────────────────────────
|
||||
|
||||
def _get_group_item_ids(contextKey: str, groupId: str, appInterface) -> set:
|
||||
"""Collect all file IDs in a group and its sub-groups from the stored groupTree."""
|
||||
from modules.routes.routeHelpers import _collectItemIds
|
||||
try:
|
||||
mgmt = interfaceDbManagement.getInterface(
|
||||
existing = appInterface.getTableGrouping(contextKey)
|
||||
if not existing:
|
||||
return set()
|
||||
nodes = [n.model_dump() if hasattr(n, 'model_dump') else n for n in existing.rootGroups]
|
||||
result = _collectItemIds(nodes, groupId)
|
||||
return result or set()
|
||||
except Exception as e:
|
||||
logger.error(f"_get_group_item_ids failed for groupId={groupId}: {e}")
|
||||
return set()
|
||||
|
||||
|
||||
@router.patch("/groups/{groupId}/scope")
|
||||
@limiter.limit("60/minute")
|
||||
def patch_group_scope(
|
||||
request: Request,
|
||||
groupId: str = Path(..., description="Group ID"),
|
||||
body: dict = Body(...),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Set scope for all files in a group (recursive)."""
|
||||
scope = body.get("scope")
|
||||
if not scope:
|
||||
raise HTTPException(status_code=400, detail="scope is required")
|
||||
try:
|
||||
import modules.interfaces.interfaceDbApp as _appIface
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
|
||||
result = {"movedFiles": 0, "movedFolders": 0}
|
||||
|
||||
if fileIds:
|
||||
fileResult = mgmt.moveFilesBatch(fileIds, targetFolderId=targetFolderId)
|
||||
result["movedFiles"] += fileResult.get("movedFiles", 0)
|
||||
|
||||
if folderIds:
|
||||
folderResult = mgmt.moveFoldersBatch(folderIds, targetParentId=targetParentId)
|
||||
result["movedFolders"] += folderResult.get("movedFolders", 0)
|
||||
|
||||
return result
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
appInterface = _appIface.getInterface(currentUser)
|
||||
fileIds = _get_group_item_ids("files/list", groupId, appInterface)
|
||||
updated = 0
|
||||
for fid in fileIds:
|
||||
try:
|
||||
managementInterface.updateFile(fid, {"scope": scope})
|
||||
updated += 1
|
||||
except Exception as e:
|
||||
logger.error(f"patch_group_scope: failed to update file {fid}: {e}")
|
||||
return {"groupId": groupId, "scope": scope, "filesUpdated": updated}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error in batch move: {e}")
|
||||
logger.error(f"patch_group_scope error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.patch("/groups/{groupId}/neutralize")
|
||||
@limiter.limit("60/minute")
|
||||
def patch_group_neutralize(
|
||||
request: Request,
|
||||
groupId: str = Path(..., description="Group ID"),
|
||||
body: dict = Body(...),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Toggle neutralize for all files in a group (recursive, incl. knowledge purge/reindex)."""
|
||||
neutralize = body.get("neutralize")
|
||||
if neutralize is None:
|
||||
raise HTTPException(status_code=400, detail="neutralize is required")
|
||||
try:
|
||||
import modules.interfaces.interfaceDbApp as _appIface
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
appInterface = _appIface.getInterface(currentUser)
|
||||
fileIds = _get_group_item_ids("files/list", groupId, appInterface)
|
||||
updated = 0
|
||||
for fid in fileIds:
|
||||
try:
|
||||
managementInterface.updateFile(fid, {"neutralize": neutralize})
|
||||
if not neutralize:
|
||||
try:
|
||||
from modules.interfaces import interfaceDbKnowledge
|
||||
kIface = interfaceDbKnowledge.getInterface(currentUser)
|
||||
kIface.purgeFileKnowledge(fid)
|
||||
except Exception as ke:
|
||||
logger.warning(f"patch_group_neutralize: knowledge purge failed for {fid}: {ke}")
|
||||
updated += 1
|
||||
except Exception as e:
|
||||
logger.error(f"patch_group_neutralize: failed for file {fid}: {e}")
|
||||
return {"groupId": groupId, "neutralize": neutralize, "filesUpdated": updated}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"patch_group_neutralize error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/groups/{groupId}/download")
|
||||
@limiter.limit("20/minute")
|
||||
async def download_group_zip(
|
||||
request: Request,
|
||||
groupId: str = Path(..., description="Group ID"),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Download all files in a group as a ZIP archive."""
|
||||
import io, zipfile
|
||||
try:
|
||||
import modules.interfaces.interfaceDbApp as _appIface
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
appInterface = _appIface.getInterface(currentUser)
|
||||
fileIds = _get_group_item_ids("files/list", groupId, appInterface)
|
||||
if not fileIds:
|
||||
raise HTTPException(status_code=404, detail="Group not found or empty")
|
||||
buf = io.BytesIO()
|
||||
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
for fid in fileIds:
|
||||
try:
|
||||
fileMeta = managementInterface.getFile(fid)
|
||||
fileData = managementInterface.getFileData(fid)
|
||||
if fileMeta and fileData:
|
||||
name = (fileMeta.get("fileName") if isinstance(fileMeta, dict) else getattr(fileMeta, "fileName", fid)) or fid
|
||||
zf.writestr(name, fileData)
|
||||
except Exception as fe:
|
||||
logger.warning(f"download_group_zip: skipping file {fid}: {fe}")
|
||||
buf.seek(0)
|
||||
from fastapi.responses import StreamingResponse
|
||||
return StreamingResponse(
|
||||
buf,
|
||||
media_type="application/zip",
|
||||
headers={"Content-Disposition": f'attachment; filename="group-{groupId}.zip"'},
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"download_group_zip error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.delete("/groups/{groupId}")
|
||||
@limiter.limit("30/minute")
|
||||
def delete_group(
|
||||
request: Request,
|
||||
groupId: str = Path(..., description="Group ID"),
|
||||
deleteItems: bool = Query(False, description="If true, also delete all files in the group"),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Remove a group from the groupTree. Optionally delete all its files."""
|
||||
try:
|
||||
import modules.interfaces.interfaceDbApp as _appIface
|
||||
appInterface = _appIface.getInterface(currentUser)
|
||||
fileIds = _get_group_item_ids("files/list", groupId, appInterface)
|
||||
# Remove group from tree
|
||||
existing = appInterface.getTableGrouping("files/list")
|
||||
if existing:
|
||||
from modules.routes.routeHelpers import _removeGroupFromTree
|
||||
newRoots = _removeGroupFromTree([n.model_dump() if hasattr(n, 'model_dump') else n for n in existing.rootGroups], groupId)
|
||||
appInterface.upsertTableGrouping("files/list", newRoots)
|
||||
# Optionally delete files
|
||||
deletedFiles = 0
|
||||
if deleteItems:
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
for fid in fileIds:
|
||||
try:
|
||||
managementInterface.deleteFile(fid)
|
||||
deletedFiles += 1
|
||||
except Exception as e:
|
||||
logger.error(f"delete_group: failed to delete file {fid}: {e}")
|
||||
return {"groupId": groupId, "deletedFiles": deletedFiles}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"delete_group error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
|
|
@ -1071,7 +909,7 @@ def update_file(
|
|||
) -> FileItem:
|
||||
"""Update file info"""
|
||||
try:
|
||||
_EDITABLE_FIELDS = {"fileName", "scope", "tags", "description", "folderId", "neutralize"}
|
||||
_EDITABLE_FIELDS = {"fileName", "scope", "tags", "description", "neutralize"}
|
||||
safeData = {k: v for k, v in file_info.items() if k in _EDITABLE_FIELDS}
|
||||
if not safeData:
|
||||
raise HTTPException(status_code=400, detail=routeApiMsg("No editable fields provided"))
|
||||
|
|
@ -1226,37 +1064,3 @@ def preview_file(
|
|||
)
|
||||
|
||||
|
||||
@router.post("/{fileId}/move")
|
||||
@limiter.limit("10/minute")
|
||||
def move_file(
|
||||
request: Request,
|
||||
fileId: str = Path(...),
|
||||
body: Dict[str, Any] = Body(...),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext)
|
||||
) -> Dict[str, Any]:
|
||||
"""Move a file to a different folder."""
|
||||
targetFolderId = body.get("targetFolderId")
|
||||
try:
|
||||
mgmt = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
featureInstanceId=str(context.featureInstanceId) if context.featureInstanceId else None,
|
||||
)
|
||||
mgmt.updateFile(fileId, {"folderId": targetFolderId})
|
||||
|
||||
if targetFolderId:
|
||||
try:
|
||||
targetFolder = mgmt.getFolder(targetFolderId)
|
||||
folderNeut = (targetFolder.get("neutralize") if isinstance(targetFolder, dict)
|
||||
else getattr(targetFolder, "neutralize", False)) if targetFolder else False
|
||||
if folderNeut:
|
||||
mgmt.updateFile(fileId, {"neutralize": True})
|
||||
logger.info("File %s moved to neutralized folder %s — inherited neutralize=True", fileId, targetFolderId)
|
||||
except Exception as e:
|
||||
logger.warning("File move: folder neutralize inheritance check failed for %s: %s", fileId, e)
|
||||
|
||||
return {"success": True, "fileId": fileId, "folderId": targetFolderId}
|
||||
except Exception as e:
|
||||
logger.error(f"Error moving file: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
|
|
|||
|
|
@ -112,8 +112,8 @@ def get_mandates(
|
|||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=routeApiMsg("Admin role required")
|
||||
)
|
||||
|
||||
# Parse pagination parameter
|
||||
|
||||
# Parse pagination parameter early — needed for grouping in all modes
|
||||
paginationParams = None
|
||||
if pagination:
|
||||
try:
|
||||
|
|
@ -126,14 +126,24 @@ def get_mandates(
|
|||
status_code=400,
|
||||
detail=f"Invalid pagination parameter: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
from modules.routes.routeHelpers import (
|
||||
handleFilterValuesInMemory, handleIdsInMemory,
|
||||
handleFilterValuesMode, handleIdsMode,
|
||||
parseCrossFilterPagination,
|
||||
handleGroupingInRequest, applyGroupScopeFilter,
|
||||
)
|
||||
|
||||
appInterface = interfaceDbApp.getRootInterface()
|
||||
groupCtx = handleGroupingInRequest(paginationParams, appInterface, "mandates")
|
||||
|
||||
def _mandateItemsForAdmin():
|
||||
items = []
|
||||
for mid in adminMandateIds:
|
||||
m = appInterface.getMandate(mid)
|
||||
if m and getattr(m, "enabled", True):
|
||||
items.append(m.model_dump() if hasattr(m, 'model_dump') else m if isinstance(m, dict) else vars(m))
|
||||
return items
|
||||
|
||||
if mode == "filterValues":
|
||||
if not column:
|
||||
|
|
@ -144,54 +154,42 @@ def get_mandates(
|
|||
values = appInterface.db.getDistinctColumnValues(Mandate, column, crossPagination)
|
||||
return JSONResponse(content=sorted(values, key=lambda v: str(v).lower()))
|
||||
else:
|
||||
mandateItems = []
|
||||
for mid in adminMandateIds:
|
||||
m = appInterface.getMandate(mid)
|
||||
if m and getattr(m, "enabled", True):
|
||||
mandateItems.append(m.model_dump() if hasattr(m, 'model_dump') else m if isinstance(m, dict) else vars(m))
|
||||
mandateItems = applyGroupScopeFilter(_mandateItemsForAdmin(), groupCtx.itemIds)
|
||||
return handleFilterValuesInMemory(mandateItems, column, pagination)
|
||||
|
||||
if mode == "ids":
|
||||
if isPlatformAdmin:
|
||||
return handleIdsMode(appInterface.db, Mandate, pagination)
|
||||
else:
|
||||
mandateItems = []
|
||||
for mid in adminMandateIds:
|
||||
m = appInterface.getMandate(mid)
|
||||
if m and getattr(m, "enabled", True):
|
||||
mandateItems.append(m.model_dump() if hasattr(m, 'model_dump') else m if isinstance(m, dict) else vars(m))
|
||||
mandateItems = applyGroupScopeFilter(_mandateItemsForAdmin(), groupCtx.itemIds)
|
||||
return handleIdsInMemory(mandateItems, pagination)
|
||||
|
||||
if isPlatformAdmin:
|
||||
result = appInterface.getAllMandates(pagination=paginationParams)
|
||||
else:
|
||||
allMandates = []
|
||||
for mandateId in adminMandateIds:
|
||||
mandate = appInterface.getMandate(mandateId)
|
||||
if mandate and getattr(mandate, "enabled", True):
|
||||
mandateDict = mandate if isinstance(mandate, dict) else mandate.model_dump() if hasattr(mandate, 'model_dump') else vars(mandate)
|
||||
allMandates.append(mandateDict)
|
||||
result = allMandates
|
||||
paginationParams = None
|
||||
|
||||
if paginationParams and hasattr(result, 'items'):
|
||||
return PaginatedResponse(
|
||||
items=result.items,
|
||||
pagination=PaginationMetadata(
|
||||
currentPage=paginationParams.page,
|
||||
pageSize=paginationParams.pageSize,
|
||||
totalItems=result.totalItems,
|
||||
totalPages=result.totalPages,
|
||||
sort=paginationParams.sort,
|
||||
filters=paginationParams.filters
|
||||
items = result.items if hasattr(result, 'items') else (result if isinstance(result, list) else [])
|
||||
items = applyGroupScopeFilter(
|
||||
[i.model_dump() if hasattr(i, 'model_dump') else (i if isinstance(i, dict) else vars(i)) for i in items],
|
||||
groupCtx.itemIds,
|
||||
)
|
||||
if paginationParams and hasattr(result, 'items'):
|
||||
return PaginatedResponse(
|
||||
items=items,
|
||||
pagination=PaginationMetadata(
|
||||
currentPage=paginationParams.page,
|
||||
pageSize=paginationParams.pageSize,
|
||||
totalItems=result.totalItems,
|
||||
totalPages=result.totalPages,
|
||||
sort=paginationParams.sort,
|
||||
filters=paginationParams.filters
|
||||
),
|
||||
groupTree=groupCtx.groupTree,
|
||||
)
|
||||
)
|
||||
else:
|
||||
return PaginatedResponse(items=items, pagination=None, groupTree=groupCtx.groupTree)
|
||||
else:
|
||||
items = result if isinstance(result, list) else (result.items if hasattr(result, 'items') else result)
|
||||
return PaginatedResponse(
|
||||
items=items,
|
||||
pagination=None
|
||||
)
|
||||
mandateItems = applyGroupScopeFilter(_mandateItemsForAdmin(), groupCtx.itemIds)
|
||||
return PaginatedResponse(items=mandateItems, pagination=None, groupTree=groupCtx.groupTree)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -44,27 +44,15 @@ def get_prompts(
|
|||
- filterValues: distinct values for a column (cross-filtered)
|
||||
- ids: all IDs matching current filters
|
||||
"""
|
||||
from modules.routes.routeHelpers import handleFilterValuesInMemory, handleIdsInMemory, enrichRowsWithFkLabels
|
||||
from modules.routes.routeHelpers import (
|
||||
handleFilterValuesInMemory, handleIdsInMemory, enrichRowsWithFkLabels,
|
||||
handleGroupingInRequest, applyGroupScopeFilter,
|
||||
)
|
||||
from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
|
||||
|
||||
def _promptsToEnrichedDicts(promptItems):
|
||||
dicts = [r.model_dump() if hasattr(r, 'model_dump') else (dict(r) if not isinstance(r, dict) else r) for r in promptItems]
|
||||
enrichRowsWithFkLabels(dicts, Prompt)
|
||||
return dicts
|
||||
|
||||
if mode == "filterValues":
|
||||
if not column:
|
||||
raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
|
||||
managementInterface = interfaceDbManagement.getInterface(currentUser)
|
||||
result = managementInterface.getAllPrompts(pagination=None)
|
||||
items = _promptsToEnrichedDicts(result)
|
||||
return handleFilterValuesInMemory(items, column, pagination)
|
||||
|
||||
if mode == "ids":
|
||||
managementInterface = interfaceDbManagement.getInterface(currentUser)
|
||||
result = managementInterface.getAllPrompts(pagination=None)
|
||||
items = _promptsToEnrichedDicts(result)
|
||||
return handleIdsInMemory(items, pagination)
|
||||
CONTEXT_KEY = "prompts"
|
||||
|
||||
# Parse pagination params early — needed for grouping in all modes
|
||||
paginationParams = None
|
||||
if pagination:
|
||||
try:
|
||||
|
|
@ -74,12 +62,35 @@ def get_prompts(
|
|||
paginationParams = PaginationParams(**paginationDict)
|
||||
except (json.JSONDecodeError, ValueError) as e:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid pagination parameter: {str(e)}")
|
||||
|
||||
|
||||
appInterface = getAppInterface(currentUser)
|
||||
groupCtx = handleGroupingInRequest(paginationParams, appInterface, CONTEXT_KEY)
|
||||
|
||||
def _promptsToEnrichedDicts(promptItems):
|
||||
dicts = [r.model_dump() if hasattr(r, 'model_dump') else (dict(r) if not isinstance(r, dict) else r) for r in promptItems]
|
||||
enrichRowsWithFkLabels(dicts, Prompt)
|
||||
return dicts
|
||||
|
||||
managementInterface = interfaceDbManagement.getInterface(currentUser)
|
||||
|
||||
if mode == "filterValues":
|
||||
if not column:
|
||||
raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
|
||||
result = managementInterface.getAllPrompts(pagination=None)
|
||||
items = _promptsToEnrichedDicts(result)
|
||||
items = applyGroupScopeFilter(items, groupCtx.itemIds)
|
||||
return handleFilterValuesInMemory(items, column, pagination)
|
||||
|
||||
if mode == "ids":
|
||||
result = managementInterface.getAllPrompts(pagination=None)
|
||||
items = _promptsToEnrichedDicts(result)
|
||||
items = applyGroupScopeFilter(items, groupCtx.itemIds)
|
||||
return handleIdsInMemory(items, pagination)
|
||||
|
||||
result = managementInterface.getAllPrompts(pagination=paginationParams)
|
||||
|
||||
|
||||
if paginationParams:
|
||||
items = _promptsToEnrichedDicts(result.items)
|
||||
items = applyGroupScopeFilter(_promptsToEnrichedDicts(result.items), groupCtx.itemIds)
|
||||
return {
|
||||
"items": items,
|
||||
"pagination": PaginationMetadata(
|
||||
|
|
@ -90,12 +101,14 @@ def get_prompts(
|
|||
sort=paginationParams.sort,
|
||||
filters=paginationParams.filters
|
||||
).model_dump(),
|
||||
"groupTree": groupCtx.groupTree,
|
||||
}
|
||||
else:
|
||||
items = _promptsToEnrichedDicts(result)
|
||||
items = applyGroupScopeFilter(_promptsToEnrichedDicts(result), groupCtx.itemIds)
|
||||
return {
|
||||
"items": items,
|
||||
"pagination": None,
|
||||
"groupTree": groupCtx.groupTree,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -208,6 +208,21 @@ def get_users(
|
|||
- GET /api/users/ (no pagination - returns all users in mandate)
|
||||
- GET /api/users/?pagination={"page":1,"pageSize":10,"sort":[]}
|
||||
"""
|
||||
# Parse pagination early — needed for grouping in all modes
|
||||
_paginationParams = None
|
||||
if pagination:
|
||||
try:
|
||||
_pd = json.loads(pagination)
|
||||
if _pd:
|
||||
_pd = normalize_pagination_dict(_pd)
|
||||
_paginationParams = PaginationParams(**_pd)
|
||||
except (json.JSONDecodeError, ValueError) as e:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid pagination parameter: {str(e)}")
|
||||
|
||||
from modules.routes.routeHelpers import handleGroupingInRequest as _handleGrouping, applyGroupScopeFilter as _applyGroupScope
|
||||
_appInterfaceForGrouping = interfaceDbApp.getInterface(context.user, mandateId=context.mandateId)
|
||||
_groupCtx = _handleGrouping(_paginationParams, _appInterfaceForGrouping, "users")
|
||||
|
||||
if mode == "filterValues":
|
||||
if not column:
|
||||
raise HTTPException(status_code=400, detail="column parameter required for mode=filterValues")
|
||||
|
|
@ -217,27 +232,15 @@ def get_users(
|
|||
return _getUserFilterOrIds(context, pagination, idsMode=True)
|
||||
|
||||
try:
|
||||
paginationParams = None
|
||||
if pagination:
|
||||
try:
|
||||
paginationDict = json.loads(pagination)
|
||||
if paginationDict:
|
||||
paginationDict = normalize_pagination_dict(paginationDict)
|
||||
paginationParams = PaginationParams(**paginationDict)
|
||||
except (json.JSONDecodeError, ValueError) as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid pagination parameter: {str(e)}"
|
||||
)
|
||||
|
||||
appInterface = interfaceDbApp.getInterface(context.user, mandateId=context.mandateId)
|
||||
paginationParams = _paginationParams
|
||||
appInterface = _appInterfaceForGrouping
|
||||
|
||||
if context.mandateId:
|
||||
# Get users for specific mandate using getUsersByMandate
|
||||
result = appInterface.getUsersByMandate(str(context.mandateId), paginationParams)
|
||||
|
||||
|
||||
if paginationParams and hasattr(result, 'items'):
|
||||
enriched = enrichRowsWithFkLabels(_usersToDicts(result.items), User)
|
||||
enriched = _applyGroupScope(enrichRowsWithFkLabels(_usersToDicts(result.items), User), _groupCtx.itemIds)
|
||||
return {
|
||||
"items": enriched,
|
||||
"pagination": PaginationMetadata(
|
||||
|
|
@ -248,17 +251,18 @@ def get_users(
|
|||
sort=paginationParams.sort,
|
||||
filters=paginationParams.filters
|
||||
).model_dump(),
|
||||
"groupTree": _groupCtx.groupTree,
|
||||
}
|
||||
else:
|
||||
users = result if isinstance(result, list) else result.items if hasattr(result, 'items') else []
|
||||
enriched = enrichRowsWithFkLabels(_usersToDicts(users), User)
|
||||
return {"items": enriched, "pagination": None}
|
||||
enriched = _applyGroupScope(enrichRowsWithFkLabels(_usersToDicts(users), User), _groupCtx.itemIds)
|
||||
return {"items": enriched, "pagination": None, "groupTree": _groupCtx.groupTree}
|
||||
elif context.isPlatformAdmin:
|
||||
# PlatformAdmin without mandateId — DB-level pagination via interface
|
||||
result = appInterface.getAllUsers(paginationParams)
|
||||
|
||||
|
||||
if paginationParams and hasattr(result, 'items'):
|
||||
enriched = enrichRowsWithFkLabels(_usersToDicts(result.items), User)
|
||||
enriched = _applyGroupScope(enrichRowsWithFkLabels(_usersToDicts(result.items), User), _groupCtx.itemIds)
|
||||
return {
|
||||
"items": enriched,
|
||||
"pagination": PaginationMetadata(
|
||||
|
|
@ -269,11 +273,12 @@ def get_users(
|
|||
sort=paginationParams.sort,
|
||||
filters=paginationParams.filters
|
||||
).model_dump(),
|
||||
"groupTree": _groupCtx.groupTree,
|
||||
}
|
||||
else:
|
||||
users = result if isinstance(result, list) else (result.items if hasattr(result, 'items') else [])
|
||||
enriched = enrichRowsWithFkLabels(_usersToDicts(users), User)
|
||||
return {"items": enriched, "pagination": None}
|
||||
enriched = _applyGroupScope(enrichRowsWithFkLabels(_usersToDicts(users), User), _groupCtx.itemIds)
|
||||
return {"items": enriched, "pagination": None, "groupTree": _groupCtx.groupTree}
|
||||
else:
|
||||
# Non-SysAdmin without mandateId: aggregate users across all admin mandates
|
||||
rootInterface = getRootInterface()
|
||||
|
|
@ -313,16 +318,16 @@ def get_users(
|
|||
]
|
||||
|
||||
from modules.routes.routeHelpers import applyFiltersAndSort as _applyFiltersAndSortHelper
|
||||
filteredUsers = _applyFiltersAndSortHelper(allUsers, paginationParams)
|
||||
filteredUsers = _applyGroupScope(_applyFiltersAndSortHelper(allUsers, paginationParams), _groupCtx.itemIds)
|
||||
enriched = enrichRowsWithFkLabels(filteredUsers, User)
|
||||
|
||||
|
||||
if paginationParams:
|
||||
import math
|
||||
totalItems = len(enriched)
|
||||
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
|
||||
startIdx = (paginationParams.page - 1) * paginationParams.pageSize
|
||||
endIdx = startIdx + paginationParams.pageSize
|
||||
|
||||
|
||||
return {
|
||||
"items": enriched[startIdx:endIdx],
|
||||
"pagination": PaginationMetadata(
|
||||
|
|
@ -333,9 +338,10 @@ def get_users(
|
|||
sort=paginationParams.sort,
|
||||
filters=paginationParams.filters
|
||||
).model_dump(),
|
||||
"groupTree": _groupCtx.groupTree,
|
||||
}
|
||||
else:
|
||||
return {"items": enriched, "pagination": None}
|
||||
return {"items": enriched, "pagination": None, "groupTree": _groupCtx.groupTree}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -701,3 +701,157 @@ def paginateInMemory(
|
|||
offset = (paginationParams.page - 1) * paginationParams.pageSize
|
||||
pageItems = items[offset:offset + paginationParams.pageSize]
|
||||
return pageItems, totalItems
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Table Grouping helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
from dataclasses import dataclass, field as dc_field
|
||||
|
||||
|
||||
@dataclass
|
||||
class GroupingContext:
|
||||
"""
|
||||
Result of handleGroupingInRequest.
|
||||
Carries the group tree for the response and the resolved item-ID set for
|
||||
group-scope filtering (None = no active group scope).
|
||||
"""
|
||||
groupTree: Optional[list] # List[TableGroupNode] serialised as dicts — for response
|
||||
itemIds: Optional[set] # Set[str] when groupId was set, else None
|
||||
|
||||
|
||||
def _collectItemIds(nodes: list, groupId: str) -> Optional[set]:
|
||||
"""
|
||||
Recursively search *nodes* for a node whose id == groupId and collect
|
||||
all itemIds from it and all its descendant subGroups.
|
||||
Returns None if the group is not found.
|
||||
"""
|
||||
for node in nodes:
|
||||
nodeId = node.get("id") if isinstance(node, dict) else getattr(node, "id", None)
|
||||
if nodeId == groupId:
|
||||
ids: set = set()
|
||||
_collectAllIds(node, ids)
|
||||
return ids
|
||||
subGroups = node.get("subGroups", []) if isinstance(node, dict) else getattr(node, "subGroups", [])
|
||||
result = _collectItemIds(subGroups, groupId)
|
||||
if result is not None:
|
||||
return result
|
||||
return None
|
||||
|
||||
|
||||
def _collectAllIds(node, ids: set) -> None:
|
||||
"""Collect itemIds from a node and all its descendants into ids."""
|
||||
nodeItemIds = node.get("itemIds", []) if isinstance(node, dict) else getattr(node, "itemIds", [])
|
||||
for iid in nodeItemIds:
|
||||
ids.add(str(iid))
|
||||
subGroups = node.get("subGroups", []) if isinstance(node, dict) else getattr(node, "subGroups", [])
|
||||
for child in subGroups:
|
||||
_collectAllIds(child, ids)
|
||||
|
||||
|
||||
def _removeGroupFromTree(nodes: list, groupId: str) -> list:
|
||||
"""Remove a group node (and all descendants) from the tree by id."""
|
||||
result = []
|
||||
for node in nodes:
|
||||
nodeId = node.get("id") if isinstance(node, dict) else getattr(node, "id", None)
|
||||
if nodeId == groupId:
|
||||
continue # skip this node (remove it)
|
||||
subGroups = node.get("subGroups", []) if isinstance(node, dict) else getattr(node, "subGroups", [])
|
||||
filtered_sub = _removeGroupFromTree(subGroups, groupId)
|
||||
if isinstance(node, dict):
|
||||
node = {**node, "subGroups": filtered_sub}
|
||||
result.append(node)
|
||||
return result
|
||||
|
||||
|
||||
def handleGroupingInRequest(
|
||||
paginationParams: Optional[PaginationParams],
|
||||
interface,
|
||||
contextKey: str,
|
||||
) -> GroupingContext:
|
||||
"""
|
||||
Central grouping handler — call at the start of every list route that
|
||||
supports table grouping.
|
||||
|
||||
Steps (in order):
|
||||
1. If paginationParams.saveGroupTree is set:
|
||||
persist the new tree via interface.upsertTableGrouping, then clear
|
||||
saveGroupTree from paginationParams so it is not treated as a filter.
|
||||
2. Load the current group tree from the DB (used in step 3 and response).
|
||||
3. If paginationParams.groupId is set:
|
||||
resolve it to a Set[str] of itemIds (including all sub-groups),
|
||||
then clear groupId from paginationParams so it is not treated as a
|
||||
normal filter field.
|
||||
4. Return a GroupingContext with groupTree (for the response) and itemIds
|
||||
(for applyGroupScopeFilter).
|
||||
|
||||
The caller does NOT need to handle any grouping logic itself — just call
|
||||
applyGroupScopeFilter(items, groupCtx.itemIds) and embed groupCtx.groupTree
|
||||
in the response dict.
|
||||
"""
|
||||
from modules.datamodels.datamodelPagination import TableGroupNode
|
||||
|
||||
groupTree = None
|
||||
itemIds = None
|
||||
|
||||
if paginationParams is None:
|
||||
try:
|
||||
existing = interface.getTableGrouping(contextKey)
|
||||
if existing:
|
||||
groupTree = [n.model_dump() if hasattr(n, "model_dump") else n for n in existing.rootGroups]
|
||||
except Exception as e:
|
||||
logger.warning(f"handleGroupingInRequest: getTableGrouping failed: {e}")
|
||||
return GroupingContext(groupTree=groupTree, itemIds=None)
|
||||
|
||||
# Step 1: persist saveGroupTree if present
|
||||
if paginationParams.saveGroupTree is not None:
|
||||
try:
|
||||
saved = interface.upsertTableGrouping(contextKey, paginationParams.saveGroupTree)
|
||||
groupTree = [n.model_dump() if hasattr(n, "model_dump") else n for n in saved.rootGroups]
|
||||
except Exception as e:
|
||||
logger.error(f"handleGroupingInRequest: upsertTableGrouping failed: {e}")
|
||||
paginationParams.saveGroupTree = None
|
||||
|
||||
# Step 2: load current tree (only if not already set from save above)
|
||||
if groupTree is None:
|
||||
try:
|
||||
existing = interface.getTableGrouping(contextKey)
|
||||
if existing:
|
||||
groupTree = [n.model_dump() if hasattr(n, "model_dump") else n for n in existing.rootGroups]
|
||||
except Exception as e:
|
||||
logger.warning(f"handleGroupingInRequest: getTableGrouping failed: {e}")
|
||||
|
||||
# Step 3: resolve groupId to itemIds set
|
||||
if paginationParams.groupId is not None:
|
||||
targetGroupId = paginationParams.groupId
|
||||
paginationParams.groupId = None # remove so it is not treated as a normal filter
|
||||
if groupTree:
|
||||
itemIds = _collectItemIds(groupTree, targetGroupId)
|
||||
if itemIds is None:
|
||||
logger.warning(
|
||||
f"handleGroupingInRequest: groupId={targetGroupId!r} not found in tree "
|
||||
f"for contextKey={contextKey!r} — returning empty set"
|
||||
)
|
||||
itemIds = set() # unknown group → show nothing rather than everything
|
||||
else:
|
||||
# groupId sent but no tree saved yet → return empty (nothing belongs to any group)
|
||||
logger.warning(
|
||||
f"handleGroupingInRequest: groupId={targetGroupId!r} set but no tree exists "
|
||||
f"for contextKey={contextKey!r} — returning empty set"
|
||||
)
|
||||
itemIds = set()
|
||||
|
||||
return GroupingContext(groupTree=groupTree, itemIds=itemIds)
|
||||
|
||||
|
||||
def applyGroupScopeFilter(items: List[Dict[str, Any]], itemIds: Optional[set]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Filter items to those whose "id" field is in itemIds.
|
||||
Returns items unchanged when itemIds is None (no active group scope).
|
||||
Works for both normal list items and for mode=ids / mode=filterValues flows
|
||||
— call it before handleIdsInMemory / handleFilterValuesInMemory.
|
||||
"""
|
||||
if itemIds is None:
|
||||
return items
|
||||
return [item for item in items if str(item.get("id", "")) in itemIds]
|
||||
|
|
|
|||
|
|
@ -241,6 +241,29 @@ async def auth_connect_callback(
|
|||
)
|
||||
interface.saveConnectionToken(token)
|
||||
|
||||
try:
|
||||
from modules.shared.callbackRegistry import callbackRegistry
|
||||
|
||||
if connection.knowledgeIngestionEnabled:
|
||||
callbackRegistry.trigger(
|
||||
"connection.established",
|
||||
connectionId=connection.id,
|
||||
authority=str(getattr(connection.authority, "value", connection.authority) or "clickup"),
|
||||
userId=str(user.id),
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.skipped — knowledge ingestion disabled by user",
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.skipped",
|
||||
"connectionId": connection.id,
|
||||
"authority": "clickup",
|
||||
"reason": "consent_disabled",
|
||||
},
|
||||
)
|
||||
except Exception as _cbErr:
|
||||
logger.warning("connection.established callback failed for %s: %s", connection.id, _cbErr)
|
||||
|
||||
return HTMLResponse(
|
||||
content=f"""
|
||||
<html>
|
||||
|
|
|
|||
|
|
@ -479,6 +479,29 @@ async def auth_connect_callback(
|
|||
)
|
||||
interface.saveConnectionToken(token)
|
||||
|
||||
try:
|
||||
from modules.shared.callbackRegistry import callbackRegistry
|
||||
|
||||
if connection.knowledgeIngestionEnabled:
|
||||
callbackRegistry.trigger(
|
||||
"connection.established",
|
||||
connectionId=connection.id,
|
||||
authority=str(getattr(connection.authority, "value", connection.authority) or "google"),
|
||||
userId=str(user.id),
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.skipped — knowledge ingestion disabled by user",
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.skipped",
|
||||
"connectionId": connection.id,
|
||||
"authority": "google",
|
||||
"reason": "consent_disabled",
|
||||
},
|
||||
)
|
||||
except Exception as _cbErr:
|
||||
logger.warning("connection.established callback failed for %s: %s", connection.id, _cbErr)
|
||||
|
||||
return HTMLResponse(
|
||||
content=f"""
|
||||
<html>
|
||||
|
|
|
|||
|
|
@ -420,6 +420,29 @@ async def auth_connect_callback(
|
|||
)
|
||||
interface.saveConnectionToken(token)
|
||||
|
||||
try:
|
||||
from modules.shared.callbackRegistry import callbackRegistry
|
||||
|
||||
if connection.knowledgeIngestionEnabled:
|
||||
callbackRegistry.trigger(
|
||||
"connection.established",
|
||||
connectionId=connection.id,
|
||||
authority=str(getattr(connection.authority, "value", connection.authority) or "msft"),
|
||||
userId=str(user.id),
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.skipped — knowledge ingestion disabled by user",
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.skipped",
|
||||
"connectionId": connection.id,
|
||||
"authority": "msft",
|
||||
"reason": "consent_disabled",
|
||||
},
|
||||
)
|
||||
except Exception as _cbErr:
|
||||
logger.warning("connection.established callback failed for %s: %s", connection.id, _cbErr)
|
||||
|
||||
return HTMLResponse(
|
||||
content=f"""
|
||||
<html>
|
||||
|
|
|
|||
|
|
@ -11,8 +11,6 @@ from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistr
|
|||
|
||||
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
|
||||
_getOrCreateTempFolder,
|
||||
_looksLikeBinary,
|
||||
_resolveFileScope,
|
||||
_MAX_TOOL_RESULT_CHARS,
|
||||
)
|
||||
|
||||
|
|
@ -392,65 +390,7 @@ def _registerDocumentTools(registry: ToolRegistry, services):
|
|||
if chunkMime:
|
||||
mimeType = chunkMime
|
||||
|
||||
# 2) File not yet indexed -> trigger extraction via ExtractionService, then retry
|
||||
if not imageData and knowledgeService and not knowledgeService.isFileIndexed(fileId):
|
||||
try:
|
||||
chatService = services.chat
|
||||
fileInfo = chatService.getFileInfo(fileId)
|
||||
fileContent = chatService.getFileContent(fileId)
|
||||
if fileContent and fileInfo:
|
||||
rawData = fileContent.get("data", "")
|
||||
if isinstance(rawData, str) and len(rawData) > 100:
|
||||
rawBytes = _b64.b64decode(rawData)
|
||||
elif isinstance(rawData, bytes):
|
||||
rawBytes = rawData
|
||||
else:
|
||||
rawBytes = None
|
||||
|
||||
if rawBytes:
|
||||
from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry
|
||||
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||
|
||||
fileMime = fileInfo.get("mimeType", "application/octet-stream")
|
||||
fileName = fileInfo.get("fileName", fileId)
|
||||
extracted = runExtraction(
|
||||
ExtractorRegistry(), None,
|
||||
rawBytes, fileName, fileMime, ExtractionOptions(),
|
||||
)
|
||||
|
||||
contentObjects = []
|
||||
for part in extracted.parts:
|
||||
tg = (part.typeGroup or "").lower()
|
||||
ct = "image" if tg == "image" else "text"
|
||||
if not part.data or not part.data.strip():
|
||||
continue
|
||||
contentObjects.append({
|
||||
"contentObjectId": part.id,
|
||||
"contentType": ct,
|
||||
"data": part.data,
|
||||
"contextRef": {"containerPath": fileName, "location": part.label, **(part.metadata or {})},
|
||||
})
|
||||
|
||||
if contentObjects:
|
||||
_diFiId, _diMId = _resolveFileScope(fileId, context)
|
||||
await knowledgeService.indexFile(
|
||||
fileId=fileId, fileName=fileName, mimeType=fileMime,
|
||||
userId=context.get("userId", ""), contentObjects=contentObjects,
|
||||
featureInstanceId=_diFiId,
|
||||
mandateId=_diMId,
|
||||
)
|
||||
|
||||
chunks = knowledgeService._knowledgeDb.getContentChunks(fileId)
|
||||
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
|
||||
if pageIndex is not None:
|
||||
imageChunks = [c for c in imageChunks if c.get("contextRef", {}).get("pageIndex") == pageIndex]
|
||||
if imageChunks:
|
||||
imageData = imageChunks[0].get("data", "")
|
||||
except Exception as extractErr:
|
||||
logger.warning(f"describeImage: on-demand extraction failed: {extractErr}")
|
||||
|
||||
# 3) Direct image file (not a container) - use raw file data
|
||||
# 2) Direct image file (not a container) - use raw file data
|
||||
if not imageData:
|
||||
chatService = services.chat
|
||||
fileContent = chatService.getFileContent(fileId)
|
||||
|
|
@ -460,7 +400,7 @@ def _registerDocumentTools(registry: ToolRegistry, services):
|
|||
imageData = fileContent.get("data", "")
|
||||
mimeType = fileMimeType
|
||||
|
||||
# 4) PDF page rendering: render the requested page as an image via PyMuPDF
|
||||
# 3) PDF page rendering: render the requested page as an image via PyMuPDF
|
||||
if not imageData:
|
||||
chatService = services.chat
|
||||
fileInfo = chatService.getFileInfo(fileId) if hasattr(chatService, "getFileInfo") else None
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Shared helpers for core agent tools (file scope, binary detection, temp folder)."""
|
||||
"""Shared helpers for core agent tools (file scope, binary detection, group helpers)."""
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
|
|
@ -46,39 +46,60 @@ def _looksLikeBinary(data: bytes, sampleSize: int = 1024) -> bool:
|
|||
return nonPrintable / len(sample) > 0.10
|
||||
|
||||
|
||||
def _getOrCreateInstanceFolder(chatService, featureInstanceId: str, mandateId: str = "") -> Optional[str]:
|
||||
"""Return the folder ID for a feature instance, creating it on first use.
|
||||
|
||||
Delegates to interfaceDbManagement._ensureFeatureInstanceFolder.
|
||||
AI tools call this when saving a file without an explicit folderId
|
||||
so that instance-produced files land in a named folder automatically.
|
||||
"""
|
||||
try:
|
||||
dbMgmt = chatService.interfaceDbComponent
|
||||
return dbMgmt._ensureFeatureInstanceFolder(featureInstanceId, mandateId)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not get/create instance folder for {featureInstanceId}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _getOrCreateTempFolder(chatService) -> Optional[str]:
|
||||
"""Return the ID of the root-level 'Temp' folder, creating it if it doesn't exist."""
|
||||
"""Deprecated stub: folder-based organisation has been replaced by grouping.
|
||||
|
||||
Returns None unconditionally so callers skip the (now removed) folderId
|
||||
assignment. Remove callers incrementally and delete this stub afterwards.
|
||||
"""
|
||||
logger.debug("_getOrCreateTempFolder called – folder support removed, returning None")
|
||||
return None
|
||||
|
||||
|
||||
async def _getOrCreateInstanceGroup(
|
||||
appInterface,
|
||||
featureInstanceId: str,
|
||||
contextKey: str = "files/list",
|
||||
) -> Optional[str]:
|
||||
"""Return groupId of the default group for a feature instance; create if needed."""
|
||||
try:
|
||||
allFolders = chatService.interfaceDbComponent.listFolders()
|
||||
tempFolder = next(
|
||||
(f for f in allFolders
|
||||
if f.get("name") == "Temp" and not f.get("parentId")),
|
||||
None,
|
||||
)
|
||||
if tempFolder:
|
||||
return tempFolder.get("id")
|
||||
newFolder = chatService.interfaceDbComponent.createFolder("Temp", parentId=None)
|
||||
return newFolder.get("id") if newFolder else None
|
||||
existing = appInterface.getTableGrouping(contextKey)
|
||||
nodes = [
|
||||
n.model_dump() if hasattr(n, "model_dump") else (n if isinstance(n, dict) else vars(n))
|
||||
for n in (existing.rootGroups if existing else [])
|
||||
]
|
||||
|
||||
def _find(nds):
|
||||
for nd in nds:
|
||||
meta = nd.get("meta", {}) if isinstance(nd, dict) else getattr(nd, "meta", {})
|
||||
if (meta or {}).get("featureInstanceId") == featureInstanceId:
|
||||
return nd.get("id") if isinstance(nd, dict) else getattr(nd, "id", None)
|
||||
found = _find(nd.get("subGroups", []) if isinstance(nd, dict) else getattr(nd, "subGroups", []))
|
||||
if found:
|
||||
return found
|
||||
return None
|
||||
|
||||
found = _find(nodes)
|
||||
if found:
|
||||
return found
|
||||
newId = str(uuid.uuid4())
|
||||
nodes.append({"id": newId, "name": featureInstanceId, "itemIds": [], "subGroups": [], "meta": {"featureInstanceId": featureInstanceId}})
|
||||
appInterface.upsertTableGrouping(contextKey, nodes)
|
||||
return newId
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not get/create Temp folder: {e}")
|
||||
logger.error(f"_getOrCreateInstanceGroup: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def _getOrCreateTempGroup(
|
||||
appInterface,
|
||||
sessionId: str,
|
||||
contextKey: str = "files/list",
|
||||
) -> Optional[str]:
|
||||
"""Return groupId of a temporary group for a session; create if needed."""
|
||||
return await _getOrCreateInstanceGroup(appInterface, f"_temp_{sessionId}", contextKey)
|
||||
|
||||
|
||||
def _attachFileAsChatDocument(
|
||||
services: Any,
|
||||
fileItem: Any,
|
||||
|
|
|
|||
|
|
@ -25,142 +25,11 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
# ---- Document rendering tool ----
|
||||
|
||||
def _markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
|
||||
"""Convert markdown content to the standard document JSON format expected by renderers."""
|
||||
import re as _re
|
||||
|
||||
sections = []
|
||||
order = 0
|
||||
lines = markdown.split("\n")
|
||||
i = 0
|
||||
|
||||
def _nextId():
|
||||
nonlocal order
|
||||
order += 1
|
||||
return f"s_{order}"
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
|
||||
# --- Headings ---
|
||||
headingMatch = _re.match(r'^(#{1,6})\s+(.+)', line)
|
||||
if headingMatch:
|
||||
level = len(headingMatch.group(1))
|
||||
text = headingMatch.group(2).strip()
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "heading", "order": order,
|
||||
"elements": [{"content": {"text": text, "level": level}}],
|
||||
})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# --- Fenced code blocks ---
|
||||
codeMatch = _re.match(r'^```(\w*)', line)
|
||||
if codeMatch:
|
||||
lang = codeMatch.group(1) or "text"
|
||||
codeLines = []
|
||||
i += 1
|
||||
while i < len(lines) and not lines[i].startswith("```"):
|
||||
codeLines.append(lines[i])
|
||||
i += 1
|
||||
i += 1
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "code_block", "order": order,
|
||||
"elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}],
|
||||
})
|
||||
continue
|
||||
|
||||
# --- Tables ---
|
||||
tableMatch = _re.match(r'^\|(.+)\|$', line)
|
||||
if tableMatch and (i + 1) < len(lines) and _re.match(r'^\|[\s\-:|]+\|$', lines[i + 1]):
|
||||
headerCells = [c.strip() for c in tableMatch.group(1).split("|")]
|
||||
i += 2
|
||||
rows = []
|
||||
while i < len(lines) and _re.match(r'^\|(.+)\|$', lines[i]):
|
||||
rowCells = [c.strip() for c in lines[i][1:-1].split("|")]
|
||||
rows.append(rowCells)
|
||||
i += 1
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "table", "order": order,
|
||||
"elements": [{"content": {"headers": headerCells, "rows": rows}}],
|
||||
})
|
||||
continue
|
||||
|
||||
# --- Bullet / numbered lists ---
|
||||
listMatch = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', line)
|
||||
if listMatch:
|
||||
isNumbered = bool(_re.match(r'\d+[.)]', listMatch.group(2)))
|
||||
items = []
|
||||
while i < len(lines) and _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]):
|
||||
m = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i])
|
||||
items.append({"text": m.group(3).strip()})
|
||||
i += 1
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "bullet_list", "order": order,
|
||||
"elements": [{"content": {"items": items, "list_type": "numbered" if isNumbered else "bullet"}}],
|
||||
})
|
||||
continue
|
||||
|
||||
# --- Empty lines (skip) ---
|
||||
if not line.strip():
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# --- Images:  or  ---
|
||||
imgMatch = _re.match(r'^!\[([^\]]*)\]\(([^)]+)\)', line)
|
||||
if imgMatch:
|
||||
altText = imgMatch.group(1).strip() or "Image"
|
||||
src = imgMatch.group(2).strip()
|
||||
fileId = ""
|
||||
if src.startswith("file:"):
|
||||
fileId = src[5:]
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "image", "order": order,
|
||||
"elements": [{
|
||||
"content": {
|
||||
"altText": altText,
|
||||
"base64Data": "",
|
||||
"_fileRef": fileId,
|
||||
"_srcUrl": src if not fileId else "",
|
||||
}
|
||||
}],
|
||||
})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# --- Paragraph (collect consecutive non-empty lines) ---
|
||||
paraLines = []
|
||||
while i < len(lines) and lines[i].strip() and not _re.match(r'^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )', lines[i]):
|
||||
paraLines.append(lines[i])
|
||||
i += 1
|
||||
if paraLines:
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "paragraph", "order": order,
|
||||
"elements": [{"content": {"text": " ".join(paraLines)}}],
|
||||
})
|
||||
continue
|
||||
|
||||
i += 1
|
||||
|
||||
if not sections:
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "paragraph", "order": order,
|
||||
"elements": [{"content": {"text": markdown.strip() or "(empty)"}}],
|
||||
})
|
||||
|
||||
return {
|
||||
"metadata": {
|
||||
"split_strategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "agent_rendering",
|
||||
"title": title,
|
||||
"language": language,
|
||||
},
|
||||
"documents": [{
|
||||
"id": "doc_1",
|
||||
"title": title,
|
||||
"sections": sections,
|
||||
}],
|
||||
}
|
||||
"""Delegate to the consolidated parser in subDocumentUtility."""
|
||||
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
|
||||
result = markdownToDocumentJson(markdown, title, language)
|
||||
result["metadata"]["extraction_method"] = "agent_rendering"
|
||||
return result
|
||||
|
||||
async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
"""Render agent-produced markdown content into any document format via the RendererRegistry."""
|
||||
|
|
@ -245,35 +114,75 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
except Exception as e:
|
||||
logger.warning(f"renderDocument: knowledge service unavailable: {e}")
|
||||
resolvedImages = 0
|
||||
|
||||
def _resolveImageRef(targetObj, fileRefKey="_fileRef", fileIdKey="fileId"):
|
||||
"""Resolve a single image reference dict to base64Data in-place."""
|
||||
nonlocal resolvedImages
|
||||
fileRef = targetObj.get(fileRefKey, "") or targetObj.get(fileIdKey, "")
|
||||
if not fileRef or targetObj.get("base64Data"):
|
||||
return
|
||||
if knowledgeService:
|
||||
chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef)
|
||||
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
|
||||
if imageChunks:
|
||||
targetObj["base64Data"] = imageChunks[0].get("data", "")
|
||||
chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png")
|
||||
targetObj["mimeType"] = chunkMime
|
||||
resolvedImages += 1
|
||||
if not targetObj.get("base64Data"):
|
||||
try:
|
||||
rawBytes = services.chat.getFileData(fileRef)
|
||||
if rawBytes:
|
||||
import base64 as _b64
|
||||
targetObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii")
|
||||
targetObj["mimeType"] = "image/png"
|
||||
resolvedImages += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"renderDocument: image resolve failed for fileRef={fileRef}: {e}")
|
||||
targetObj.pop("_fileRef", None)
|
||||
targetObj.pop("_srcUrl", None)
|
||||
|
||||
def _resolveInlineRuns(runsList):
|
||||
"""Scan a list of inline runs and resolve any image runs with fileId."""
|
||||
for run in runsList:
|
||||
if run.get("type") == "image" and run.get("fileId") and not run.get("base64Data"):
|
||||
_resolveImageRef(run, fileRefKey="fileId", fileIdKey="fileId")
|
||||
|
||||
for doc in structuredContent.get("documents", []):
|
||||
for section in doc.get("sections", []):
|
||||
if section.get("content_type") != "image":
|
||||
cType = section.get("content_type")
|
||||
# Block-level image sections
|
||||
if cType == "image":
|
||||
for element in section.get("elements", []):
|
||||
contentObj = element.get("content", {})
|
||||
_resolveImageRef(contentObj)
|
||||
continue
|
||||
for element in section.get("elements", []):
|
||||
contentObj = element.get("content", {})
|
||||
fileRef = contentObj.get("_fileRef", "")
|
||||
if not fileRef or contentObj.get("base64Data"):
|
||||
continue
|
||||
if knowledgeService:
|
||||
chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef)
|
||||
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
|
||||
if imageChunks:
|
||||
contentObj["base64Data"] = imageChunks[0].get("data", "")
|
||||
chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png")
|
||||
contentObj["mimeType"] = chunkMime
|
||||
resolvedImages += 1
|
||||
if not contentObj.get("base64Data"):
|
||||
try:
|
||||
rawBytes = services.chat.getFileData(fileRef)
|
||||
if rawBytes:
|
||||
import base64 as _b64
|
||||
contentObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii")
|
||||
contentObj["mimeType"] = "image/png"
|
||||
resolvedImages += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"renderDocument: image resolve failed for fileRef={fileRef}: {e}")
|
||||
contentObj.pop("_fileRef", None)
|
||||
contentObj.pop("_srcUrl", None)
|
||||
# Paragraphs with inlineRuns
|
||||
if cType == "paragraph":
|
||||
for element in section.get("elements", []):
|
||||
runs = element.get("content", {}).get("inlineRuns")
|
||||
if runs:
|
||||
_resolveInlineRuns(runs)
|
||||
continue
|
||||
# Bullet lists - items are List[List[InlineRun]]
|
||||
if cType == "bullet_list":
|
||||
for element in section.get("elements", []):
|
||||
items = element.get("content", {}).get("items", [])
|
||||
for item in items:
|
||||
if isinstance(item, list):
|
||||
_resolveInlineRuns(item)
|
||||
continue
|
||||
# Tables - headers and row cells are List[InlineRun]
|
||||
if cType == "table":
|
||||
for element in section.get("elements", []):
|
||||
contentObj = element.get("content", {})
|
||||
for cell in contentObj.get("headers", []):
|
||||
if isinstance(cell, list):
|
||||
_resolveInlineRuns(cell)
|
||||
for row in contentObj.get("rows", []):
|
||||
for cell in row:
|
||||
if isinstance(cell, list):
|
||||
_resolveInlineRuns(cell)
|
||||
|
||||
sectionCount = len(structuredContent.get("documents", [{}])[0].get("sections", []))
|
||||
logger.info(f"renderDocument: parsed {sectionCount} sections from markdown ({len(content)} chars), resolved {resolvedImages} image(s), format={outputFormat}")
|
||||
|
|
@ -285,6 +194,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
language=language,
|
||||
title=title,
|
||||
userPrompt=content,
|
||||
style=args.get("style"),
|
||||
)
|
||||
|
||||
if not documents:
|
||||
|
|
@ -367,6 +277,20 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
"outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
|
||||
"title": {"type": "string", "description": "Document title", "default": "Document"},
|
||||
"language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
|
||||
"style": {
|
||||
"type": "object",
|
||||
"description": (
|
||||
"Optional style overrides for the rendered document. Supports nested keys: "
|
||||
"fonts (primary, monospace), colors (primary, secondary, accent, background), "
|
||||
"headings (h1-h4 with sizePt, weight, color, spaceBeforePt, spaceAfterPt), "
|
||||
"paragraph (sizePt, lineSpacing, color), table (headerBg, headerFg, headerSizePt, "
|
||||
"bodySizePt, rowBandingEven, rowBandingOdd, borderColor, borderWidthPt), "
|
||||
"list (bulletChar, indentPt, sizePt), image (defaultWidthPt, maxWidthPt, alignment), "
|
||||
"codeBlock (fontSizePt, background, borderColor), "
|
||||
"page (format, marginsPt, showPageNumbers, headerHeight, footerHeight, headerLogo, headerText, footerText). "
|
||||
"Only provided keys override defaults; omitted keys keep their default values."
|
||||
),
|
||||
},
|
||||
},
|
||||
},
|
||||
readOnly=False,
|
||||
|
|
|
|||
|
|
@ -11,10 +11,9 @@ from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistr
|
|||
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
|
||||
_attachFileAsChatDocument,
|
||||
_formatToolFileResult,
|
||||
_getOrCreateInstanceFolder,
|
||||
_getOrCreateTempFolder,
|
||||
_getOrCreateInstanceGroup,
|
||||
_getOrCreateTempGroup,
|
||||
_looksLikeBinary,
|
||||
_resolveFileScope,
|
||||
_MAX_TOOL_RESULT_CHARS,
|
||||
)
|
||||
|
||||
|
|
@ -50,6 +49,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
return ToolResult(toolCallId="", toolName="readFile", success=False, error="fileId is required")
|
||||
try:
|
||||
knowledgeService = services.getService("knowledge") if hasattr(services, "getService") else None
|
||||
fileStatus = None
|
||||
|
||||
# 1) Knowledge Store: return already-extracted text chunks
|
||||
if knowledgeService:
|
||||
|
|
@ -77,7 +77,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
data=f"[File {fileId} is currently being processed (status: {fileStatus}). Try again shortly.]",
|
||||
)
|
||||
|
||||
# 2) Not indexed yet: try on-demand extraction
|
||||
# 2) Not indexed yet: inspect file type to decide how to serve the agent
|
||||
# (binary -> instruct agent to wait / re-upload; text -> decode raw bytes inline)
|
||||
chatService = services.chat
|
||||
fileInfo = chatService.getFileInfo(fileId)
|
||||
if not fileInfo:
|
||||
|
|
@ -100,83 +101,14 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
isBinary = _looksLikeBinary(rawBytes)
|
||||
|
||||
if isBinary:
|
||||
try:
|
||||
from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry, ChunkerRegistry
|
||||
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||
|
||||
extracted = runExtraction(
|
||||
ExtractorRegistry(), ChunkerRegistry(),
|
||||
rawBytes, fileName, mimeType, ExtractionOptions(),
|
||||
)
|
||||
|
||||
contentObjects = []
|
||||
for part in extracted.parts:
|
||||
tg = (part.typeGroup or "").lower()
|
||||
ct = "image" if tg == "image" else "text"
|
||||
if not part.data or not part.data.strip():
|
||||
continue
|
||||
contentObjects.append({
|
||||
"contentObjectId": part.id,
|
||||
"contentType": ct,
|
||||
"data": part.data,
|
||||
"contextRef": {
|
||||
"containerPath": fileName,
|
||||
"location": part.label or "file",
|
||||
**(part.metadata or {}),
|
||||
},
|
||||
})
|
||||
|
||||
if contentObjects:
|
||||
if knowledgeService:
|
||||
try:
|
||||
userId = context.get("userId", "")
|
||||
_fiId, _mId = _resolveFileScope(fileId, context)
|
||||
await knowledgeService.indexFile(
|
||||
fileId=fileId, fileName=fileName, mimeType=mimeType,
|
||||
userId=userId, contentObjects=contentObjects,
|
||||
featureInstanceId=_fiId,
|
||||
mandateId=_mId,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"readFile: knowledge indexing failed for {fileId}: {e}")
|
||||
|
||||
joined = ""
|
||||
if knowledgeService:
|
||||
_chunks = knowledgeService._knowledgeDb.getContentChunks(fileId)
|
||||
_textChunks = [
|
||||
c for c in (_chunks or [])
|
||||
if c.get("contentType") != "image" and c.get("data")
|
||||
]
|
||||
if _textChunks:
|
||||
joined = "\n\n".join(c["data"] for c in _textChunks)
|
||||
if not joined:
|
||||
textParts = [o["data"] for o in contentObjects if o["contentType"] != "image"]
|
||||
joined = "\n\n".join(textParts) if textParts else ""
|
||||
if joined:
|
||||
chunked = _applyOffsetLimit(joined, offset, limit)
|
||||
if chunked is not None:
|
||||
return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked)
|
||||
if len(joined) > _MAX_TOOL_RESULT_CHARS:
|
||||
joined = joined[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated – showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(joined)}. Use offset/limit to read specific sections.]"
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="readFile", success=True,
|
||||
data=joined,
|
||||
)
|
||||
imgCount = sum(1 for o in contentObjects if o["contentType"] == "image")
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="readFile", success=True,
|
||||
data=f"[Extracted {len(contentObjects)} content objects from '{fileName}' "
|
||||
f"({imgCount} images, no readable text). "
|
||||
f"Use describeImage(fileId='{fileId}') to analyze visual content.]",
|
||||
)
|
||||
except Exception as extractErr:
|
||||
logger.warning(f"readFile extraction failed for {fileId} ({fileName}): {extractErr}")
|
||||
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="readFile", success=True,
|
||||
data=f"[Binary file: '{fileName}', type={mimeType}, size={len(rawBytes)} bytes. "
|
||||
f"Text extraction not available. Use describeImage for images.]",
|
||||
data=(
|
||||
f"[File '{fileName}' ({mimeType}) is not yet indexed "
|
||||
f"(status: {fileStatus or 'unknown'}). Indexing runs automatically "
|
||||
f"on upload. Please wait a few seconds and retry, or re-upload the file. "
|
||||
f"For visual content use describeImage(fileId='{fileId}').]"
|
||||
),
|
||||
)
|
||||
|
||||
# 3) Text file: decode raw bytes
|
||||
|
|
@ -237,7 +169,6 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
try:
|
||||
chatService = services.chat
|
||||
files = chatService.listFiles(
|
||||
folderId=args.get("folderId"),
|
||||
tags=args.get("tags"),
|
||||
search=args.get("search"),
|
||||
)
|
||||
|
|
@ -290,18 +221,6 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="searchInFileContent", success=False, error=str(e))
|
||||
|
||||
async def _listFolders(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
try:
|
||||
chatService = services.chat
|
||||
folders = chatService.listFolders(parentId=args.get("parentId"))
|
||||
folderList = "\n".join(
|
||||
f"- {f.get('name', 'unnamed')} (id: {f.get('id', '?')})"
|
||||
for f in folders
|
||||
) if folders else "No folders found."
|
||||
return ToolResult(toolCallId="", toolName="listFolders", success=True, data=folderList)
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="listFolders", success=False, error=str(e))
|
||||
|
||||
async def _webSearch(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
query = args.get("query", "")
|
||||
if not query:
|
||||
|
|
@ -339,35 +258,6 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="tagFile", success=False, error=str(e))
|
||||
|
||||
async def _moveFile(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
fileId = args.get("fileId", "")
|
||||
targetFolderId = args.get("targetFolderId")
|
||||
if not fileId:
|
||||
return ToolResult(toolCallId="", toolName="moveFile", success=False, error="fileId is required")
|
||||
try:
|
||||
chatService = services.chat
|
||||
chatService.interfaceDbComponent.updateFile(fileId, {"folderId": targetFolderId})
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="moveFile", success=True,
|
||||
data=f"File {fileId} moved to folder {targetFolderId or 'root'}"
|
||||
)
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="moveFile", success=False, error=str(e))
|
||||
|
||||
async def _createFolder(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
name = args.get("name", "")
|
||||
if not name:
|
||||
return ToolResult(toolCallId="", toolName="createFolder", success=False, error="name is required")
|
||||
try:
|
||||
chatService = services.chat
|
||||
folder = chatService.createFolder(name=name, parentId=args.get("parentId"))
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="createFolder", success=True,
|
||||
data=f"Folder '{name}' created (id: {folder.get('id', '?')})"
|
||||
)
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="createFolder", success=False, error=str(e))
|
||||
|
||||
async def _writeFile(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
content = args.get("content", "")
|
||||
mode = args.get("mode", "create")
|
||||
|
|
@ -422,12 +312,52 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
fiId = context.get("featureInstanceId") or (services.featureInstanceId if services else "")
|
||||
if fiId:
|
||||
dbMgmt.updateFile(fileItem.id, {"featureInstanceId": fiId})
|
||||
if args.get("folderId"):
|
||||
dbMgmt.updateFile(fileItem.id, {"folderId": args["folderId"]})
|
||||
if args.get("groupId"):
|
||||
try:
|
||||
appIface = chatService.interfaceDbApp
|
||||
existing = appIface.getTableGrouping("files/list")
|
||||
nodes = [n.model_dump() if hasattr(n, "model_dump") else (n if isinstance(n, dict) else vars(n)) for n in (existing.rootGroups if existing else [])]
|
||||
def _addToGroup(nds, gid, fid):
|
||||
for nd in nds:
|
||||
nid = nd.get("id") if isinstance(nd, dict) else getattr(nd, "id", None)
|
||||
if nid == gid:
|
||||
ids = list(nd.get("itemIds", []) if isinstance(nd, dict) else getattr(nd, "itemIds", []))
|
||||
if fid not in ids:
|
||||
ids.append(fid)
|
||||
if isinstance(nd, dict):
|
||||
nd["itemIds"] = ids
|
||||
return True
|
||||
if _addToGroup(nd.get("subGroups", []) if isinstance(nd, dict) else getattr(nd, "subGroups", []), gid, fid):
|
||||
return True
|
||||
return False
|
||||
_addToGroup(nodes, args["groupId"], fileItem.id)
|
||||
appIface.upsertTableGrouping("files/list", nodes)
|
||||
except Exception as _ge:
|
||||
logger.warning(f"writeFile: failed to add file to group {args['groupId']}: {_ge}")
|
||||
elif fiId:
|
||||
instanceFolderId = _getOrCreateInstanceFolder(chatService, fiId, context.get("mandateId", ""))
|
||||
if instanceFolderId:
|
||||
dbMgmt.updateFile(fileItem.id, {"folderId": instanceFolderId})
|
||||
try:
|
||||
appIface = chatService.interfaceDbApp
|
||||
instanceGroupId = await _getOrCreateInstanceGroup(appIface, fiId)
|
||||
if instanceGroupId:
|
||||
existing = appIface.getTableGrouping("files/list")
|
||||
nodes = [n.model_dump() if hasattr(n, "model_dump") else (n if isinstance(n, dict) else vars(n)) for n in (existing.rootGroups if existing else [])]
|
||||
def _addToGroup2(nds, gid, fid):
|
||||
for nd in nds:
|
||||
nid = nd.get("id") if isinstance(nd, dict) else getattr(nd, "id", None)
|
||||
if nid == gid:
|
||||
ids = list(nd.get("itemIds", []) if isinstance(nd, dict) else getattr(nd, "itemIds", []))
|
||||
if fid not in ids:
|
||||
ids.append(fid)
|
||||
if isinstance(nd, dict):
|
||||
nd["itemIds"] = ids
|
||||
return True
|
||||
if _addToGroup2(nd.get("subGroups", []) if isinstance(nd, dict) else getattr(nd, "subGroups", []), gid, fid):
|
||||
return True
|
||||
return False
|
||||
_addToGroup2(nodes, instanceGroupId, fileItem.id)
|
||||
appIface.upsertTableGrouping("files/list", nodes)
|
||||
except Exception as _ge:
|
||||
logger.warning(f"writeFile: failed to add file to instance group for {fiId}: {_ge}")
|
||||
if args.get("tags"):
|
||||
dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
|
||||
|
||||
|
|
@ -480,13 +410,13 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
registry.register(
|
||||
"listFiles", _listFiles,
|
||||
description=(
|
||||
"List files in the local workspace. Filter by folder, tags, or search term. "
|
||||
"List files in the local workspace. Filter by tags or search term. "
|
||||
"To filter by group, use listItemsInGroup. "
|
||||
"For external data sources, use browseDataSource instead."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"folderId": {"type": "string", "description": "Filter by folder ID"},
|
||||
"tags": {"type": "array", "items": {"type": "string"}, "description": "Filter by tags (any match)"},
|
||||
"search": {"type": "string", "description": "Search in file names and descriptions"},
|
||||
}
|
||||
|
|
@ -513,18 +443,6 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
readOnly=True
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"listFolders", _listFolders,
|
||||
description="List folders in the local workspace. For external data sources, use browseDataSource instead.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"parentId": {"type": "string", "description": "Parent folder ID (omit for root)"},
|
||||
}
|
||||
},
|
||||
readOnly=True
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"webSearch", _webSearch,
|
||||
description="Search the web for general information. Use readUrl to fetch content from a known URL instead.",
|
||||
|
|
@ -550,34 +468,6 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
readOnly=False
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"moveFile", _moveFile,
|
||||
description="Move a file to a different folder in the local workspace.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"fileId": {"type": "string", "description": "The file ID to move"},
|
||||
"targetFolderId": {"type": "string", "description": "Target folder ID (null for root)"},
|
||||
},
|
||||
"required": ["fileId"]
|
||||
},
|
||||
readOnly=False
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"createFolder", _createFolder,
|
||||
description="Create a new folder in the local workspace.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string", "description": "Folder name"},
|
||||
"parentId": {"type": "string", "description": "Parent folder ID (omit for root)"},
|
||||
},
|
||||
"required": ["name"]
|
||||
},
|
||||
readOnly=False
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"writeFile", _writeFile,
|
||||
description=(
|
||||
|
|
@ -598,7 +488,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
"content": {"type": "string", "description": "Content to write/append"},
|
||||
"mode": {"type": "string", "enum": ["create", "append", "overwrite"], "description": "Write mode (default: create)"},
|
||||
"fileId": {"type": "string", "description": "File ID (required for mode=append/overwrite)"},
|
||||
"folderId": {"type": "string", "description": "Target folder ID (mode=create only)"},
|
||||
"groupId": {"type": "string", "description": "Group ID to place the file in (mode=create only). Omit to use the instance default group."},
|
||||
"tags": {"type": "array", "items": {"type": "string"}, "description": "Tags (mode=create only)"},
|
||||
},
|
||||
"required": ["content"]
|
||||
|
|
@ -758,55 +648,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
readOnly=True
|
||||
)
|
||||
|
||||
# ---- Phase 2: deleteFolder, renameFolder, moveFolder, copyFile, editFile ----
|
||||
|
||||
async def _deleteFolder(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
folderId = args.get("folderId", "")
|
||||
recursive = args.get("recursive", False)
|
||||
if not folderId:
|
||||
return ToolResult(toolCallId="", toolName="deleteFolder", success=False, error="folderId is required")
|
||||
try:
|
||||
chatService = services.chat
|
||||
result = chatService.interfaceDbComponent.deleteFolder(folderId, recursive=recursive)
|
||||
summary = f"Deleted {result.get('deletedFolders', 1)} folder(s) and {result.get('deletedFiles', 0)} file(s)"
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="deleteFolder", success=True, data=summary,
|
||||
sideEvents=[{"type": "folderDeleted", "data": {"folderId": folderId, **result}}],
|
||||
)
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="deleteFolder", success=False, error=str(e))
|
||||
|
||||
async def _renameFolder(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
folderId = args.get("folderId", "")
|
||||
newName = args.get("newName", "")
|
||||
if not folderId or not newName:
|
||||
return ToolResult(toolCallId="", toolName="renameFolder", success=False, error="folderId and newName are required")
|
||||
try:
|
||||
chatService = services.chat
|
||||
chatService.interfaceDbComponent.renameFolder(folderId, newName)
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="renameFolder", success=True,
|
||||
data=f"Folder {folderId} renamed to '{newName}'",
|
||||
sideEvents=[{"type": "folderUpdated", "data": {"folderId": folderId, "name": newName}}],
|
||||
)
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="renameFolder", success=False, error=str(e))
|
||||
|
||||
async def _moveFolder(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
folderId = args.get("folderId", "")
|
||||
targetParentId = args.get("targetParentId")
|
||||
if not folderId:
|
||||
return ToolResult(toolCallId="", toolName="moveFolder", success=False, error="folderId is required")
|
||||
try:
|
||||
chatService = services.chat
|
||||
chatService.interfaceDbComponent.moveFolder(folderId, targetParentId)
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="moveFolder", success=True,
|
||||
data=f"Folder {folderId} moved to {targetParentId or 'root'}",
|
||||
sideEvents=[{"type": "folderUpdated", "data": {"folderId": folderId, "parentId": targetParentId}}],
|
||||
)
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="moveFolder", success=False, error=str(e))
|
||||
# ---- Phase 2: copyFile, editFile ----
|
||||
|
||||
async def _copyFile(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
fileId = args.get("fileId", "")
|
||||
|
|
@ -816,7 +658,6 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
chatService = services.chat
|
||||
copiedFile = chatService.interfaceDbComponent.copyFile(
|
||||
fileId,
|
||||
targetFolderId=args.get("targetFolderId"),
|
||||
newFileName=args.get("newFileName"),
|
||||
)
|
||||
return ToolResult(
|
||||
|
|
@ -891,48 +732,6 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="replaceInFile", success=False, error=str(e))
|
||||
|
||||
registry.register(
|
||||
"deleteFolder", _deleteFolder,
|
||||
description="Delete a folder from the local workspace. Set recursive=true to delete all contents.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"folderId": {"type": "string", "description": "The folder ID to delete"},
|
||||
"recursive": {"type": "boolean", "description": "If true, delete folder and all contents (files and subfolders). Default: false"},
|
||||
},
|
||||
"required": ["folderId"]
|
||||
},
|
||||
readOnly=False
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"renameFolder", _renameFolder,
|
||||
description="Rename a folder in the local workspace.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"folderId": {"type": "string", "description": "The folder ID to rename"},
|
||||
"newName": {"type": "string", "description": "New folder name"},
|
||||
},
|
||||
"required": ["folderId", "newName"]
|
||||
},
|
||||
readOnly=False
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"moveFolder", _moveFolder,
|
||||
description="Move a folder to a different parent in the local workspace.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"folderId": {"type": "string", "description": "The folder ID to move"},
|
||||
"targetParentId": {"type": "string", "description": "Target parent folder ID (null/omit for root)"},
|
||||
},
|
||||
"required": ["folderId"]
|
||||
},
|
||||
readOnly=False
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"copyFile", _copyFile,
|
||||
description="Create an independent copy of a file in the local workspace.",
|
||||
|
|
@ -940,7 +739,6 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"fileId": {"type": "string", "description": "The file ID to copy"},
|
||||
"targetFolderId": {"type": "string", "description": "Target folder for the copy (default: same folder)"},
|
||||
"newFileName": {"type": "string", "description": "New file name (default: same name, auto-numbered if duplicate)"},
|
||||
},
|
||||
"required": ["fileId"]
|
||||
|
|
@ -948,6 +746,137 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
readOnly=False
|
||||
)
|
||||
|
||||
# ---- Group tools (replaces folder-based tools) ----
|
||||
|
||||
async def _listGroups(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
contextKey = args.get("contextKey", "files/list")
|
||||
try:
|
||||
chatService = services.chat
|
||||
appInterface = chatService.interfaceDbApp
|
||||
existing = appInterface.getTableGrouping(contextKey)
|
||||
if not existing:
|
||||
return ToolResult(toolCallId="", toolName="listGroups", success=True, data="No groups found.")
|
||||
|
||||
def _flatten(nodes, depth=0):
|
||||
result = []
|
||||
for n in nodes:
|
||||
nd = n.model_dump() if hasattr(n, "model_dump") else (n if isinstance(n, dict) else vars(n))
|
||||
result.append({"id": nd.get("id"), "name": nd.get("name"), "depth": depth, "itemCount": len(nd.get("itemIds", []))})
|
||||
result.extend(_flatten(nd.get("subGroups", []), depth + 1))
|
||||
return result
|
||||
|
||||
groups = _flatten(existing.rootGroups)
|
||||
lines = "\n".join(
|
||||
f"{' ' * g['depth']}- {g['name']} (id: {g['id']}, items: {g['itemCount']})"
|
||||
for g in groups
|
||||
) if groups else "No groups found."
|
||||
return ToolResult(toolCallId="", toolName="listGroups", success=True, data=lines)
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="listGroups", success=False, error=str(e))
|
||||
|
||||
async def _listItemsInGroup(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
groupId = args.get("groupId", "")
|
||||
contextKey = args.get("contextKey", "files/list")
|
||||
if not groupId:
|
||||
return ToolResult(toolCallId="", toolName="listItemsInGroup", success=False, error="groupId is required")
|
||||
try:
|
||||
from modules.routes.routeHelpers import _collectItemIds
|
||||
chatService = services.chat
|
||||
appInterface = chatService.interfaceDbApp
|
||||
existing = appInterface.getTableGrouping(contextKey)
|
||||
if not existing:
|
||||
return ToolResult(toolCallId="", toolName="listItemsInGroup", success=True, data="No groups found.")
|
||||
nodes = [n.model_dump() if hasattr(n, "model_dump") else (n if isinstance(n, dict) else vars(n)) for n in existing.rootGroups]
|
||||
ids = _collectItemIds(nodes, groupId)
|
||||
itemList = list(ids) if ids else []
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="listItemsInGroup", success=True,
|
||||
data="\n".join(f"- {fid}" for fid in itemList) if itemList else "No items in group.",
|
||||
)
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="listItemsInGroup", success=False, error=str(e))
|
||||
|
||||
async def _addItemsToGroup(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
groupId = args.get("groupId", "")
|
||||
itemIds = args.get("itemIds", [])
|
||||
contextKey = args.get("contextKey", "files/list")
|
||||
if not groupId:
|
||||
return ToolResult(toolCallId="", toolName="addItemsToGroup", success=False, error="groupId is required")
|
||||
if not itemIds:
|
||||
return ToolResult(toolCallId="", toolName="addItemsToGroup", success=False, error="itemIds is required")
|
||||
try:
|
||||
chatService = services.chat
|
||||
appInterface = chatService.interfaceDbApp
|
||||
existing = appInterface.getTableGrouping(contextKey)
|
||||
nodes = [n.model_dump() if hasattr(n, "model_dump") else (n if isinstance(n, dict) else vars(n)) for n in (existing.rootGroups if existing else [])]
|
||||
|
||||
def _add(nds):
|
||||
for nd in nds:
|
||||
nid = nd.get("id") if isinstance(nd, dict) else getattr(nd, "id", None)
|
||||
if nid == groupId:
|
||||
existing_ids = list(nd.get("itemIds", []) if isinstance(nd, dict) else getattr(nd, "itemIds", []))
|
||||
for fid in itemIds:
|
||||
if fid not in existing_ids:
|
||||
existing_ids.append(fid)
|
||||
if isinstance(nd, dict):
|
||||
nd["itemIds"] = existing_ids
|
||||
return True
|
||||
if _add(nd.get("subGroups", []) if isinstance(nd, dict) else getattr(nd, "subGroups", [])):
|
||||
return True
|
||||
return False
|
||||
|
||||
found = _add(nodes)
|
||||
if not found:
|
||||
return ToolResult(toolCallId="", toolName="addItemsToGroup", success=False, error=f"Group {groupId} not found")
|
||||
appInterface.upsertTableGrouping(contextKey, nodes)
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="addItemsToGroup", success=True,
|
||||
data=f"Added {len(itemIds)} item(s) to group {groupId}",
|
||||
)
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="addItemsToGroup", success=False, error=str(e))
|
||||
|
||||
registry.register(
|
||||
"listGroups", _listGroups,
|
||||
description="List all groups in the file grouping tree. Groups replace folders for organising files.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"contextKey": {"type": "string", "description": "Grouping context key (default: 'files/list')"},
|
||||
}
|
||||
},
|
||||
readOnly=True
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"listItemsInGroup", _listItemsInGroup,
|
||||
description="List all file IDs assigned to a specific group (includes sub-groups recursively).",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"groupId": {"type": "string", "description": "The group ID to inspect"},
|
||||
"contextKey": {"type": "string", "description": "Grouping context key (default: 'files/list')"},
|
||||
},
|
||||
"required": ["groupId"]
|
||||
},
|
||||
readOnly=True
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"addItemsToGroup", _addItemsToGroup,
|
||||
description="Add one or more file IDs to an existing group.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"groupId": {"type": "string", "description": "The group ID to add files to"},
|
||||
"itemIds": {"type": "array", "items": {"type": "string"}, "description": "List of file IDs to add"},
|
||||
"contextKey": {"type": "string", "description": "Grouping context key (default: 'files/list')"},
|
||||
},
|
||||
"required": ["groupId", "itemIds"]
|
||||
},
|
||||
readOnly=False
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"replaceInFile", _replaceInFile,
|
||||
description=(
|
||||
|
|
|
|||
|
|
@ -268,24 +268,19 @@ class AgentService:
|
|||
info = chatService.getFileInfo(fid)
|
||||
|
||||
if not info:
|
||||
folderInfo = chatService.interfaceDbComponent.getFolder(fid)
|
||||
if folderInfo:
|
||||
folderName = folderInfo.get("name", fid)
|
||||
folderFiles = chatService.listFiles(folderId=fid)
|
||||
desc = f"### Folder: {folderName}\n - id: {fid}\n - type: folder\n - contains: {len(folderFiles)} file(s)"
|
||||
if folderFiles:
|
||||
desc += "\n - files:"
|
||||
for ff in folderFiles[:30]:
|
||||
ffName = ff.get("fileName", "?")
|
||||
ffId = ff.get("id", "?")
|
||||
ffMime = ff.get("mimeType", "?")
|
||||
ffSize = ff.get("fileSize", ff.get("size", "?"))
|
||||
desc += f"\n * {ffName} (id: {ffId}, type: {ffMime}, size: {ffSize} bytes)"
|
||||
if len(folderFiles) > 30:
|
||||
desc += f"\n ... and {len(folderFiles) - 30} more files"
|
||||
desc += f'\nUse `listFiles(folderId="{fid}")` to get the full file list, then `readFile(fileId)` to read individual files.'
|
||||
fileDescriptions.append(desc)
|
||||
continue
|
||||
# Check if fid is a group ID
|
||||
try:
|
||||
groupFileIds = chatService.listFilesInGroup(fid)
|
||||
if groupFileIds:
|
||||
allGroups = chatService.listGroups()
|
||||
groupInfo = next((g for g in allGroups if g.get("id") == fid), None)
|
||||
groupName = groupInfo.get("name", fid) if groupInfo else fid
|
||||
desc = f"### Group: {groupName}\n - id: {fid}\n - type: group\n - contains: {len(groupFileIds)} file(s)"
|
||||
desc += f'\nUse `listItemsInGroup(groupId="{fid}")` to get file IDs, then `readFile(fileId)` to read each.'
|
||||
fileDescriptions.append(desc)
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
fileDescriptions.append(f"### File id: {fid}")
|
||||
continue
|
||||
|
||||
|
|
@ -333,7 +328,7 @@ class AgentService:
|
|||
"These files/folders have been uploaded and processed through the extraction pipeline.\n"
|
||||
"Use `readFile(fileId)` to read text content, `readContentObjects(fileId)` for structured access, "
|
||||
"or `describeImage(fileId)` for image analysis.\n"
|
||||
"For folders, use `listFiles(folderId)` to get the files inside, then `readFile(fileId)` for each.\n"
|
||||
"For groups, use `listItemsInGroup(groupId)` to get the file IDs inside, then `readFile(fileId)` for each.\n"
|
||||
"For large PDFs/DOCX, avoid huge `renderDocument` tool JSON: build markdown with "
|
||||
"`writeFile` (create + append), then `renderDocument(sourceFileId=that file id, outputFormat=...)`.\n"
|
||||
"For small docs you may pass `content` inline. Embed images with `` in markdown.\n\n"
|
||||
|
|
|
|||
|
|
@ -51,6 +51,10 @@ class _ServicesAdapter:
|
|||
def workflow(self):
|
||||
return self._context.workflow
|
||||
|
||||
@workflow.setter
|
||||
def workflow(self, value):
|
||||
self._context.workflow = value
|
||||
|
||||
@property
|
||||
def chat(self):
|
||||
return self._get_service("chat")
|
||||
|
|
@ -86,7 +90,7 @@ class _ServicesAdapter:
|
|||
return getattr(w, "featureCode", None) if w else None
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
if name in ("allowedProviders", "preferredProviders", "currentUserLanguage"):
|
||||
if name in ("allowedProviders", "allowedModels", "preferredProviders", "currentUserLanguage"):
|
||||
return getattr(self.workflow, name, None) if self.workflow else None
|
||||
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
|
||||
|
||||
|
|
@ -164,12 +168,29 @@ class AiService:
|
|||
# SPEECH_TEAMS: Dedicated pipeline, bypasses standard model selection
|
||||
if request.options and request.options.operationType == OperationTypeEnum.SPEECH_TEAMS:
|
||||
return await self._handleSpeechTeams(request)
|
||||
|
||||
# FAIL-SAFE: Pre-flight billing validation (like 0 CHF credit card check)
|
||||
self._preflightBillingCheck()
|
||||
|
||||
# Balance & provider permission checks
|
||||
await self._checkBillingBeforeAiCall()
|
||||
|
||||
_opType = request.options.operationType if request.options else None
|
||||
_isNeutralizationCall = _opType in (
|
||||
OperationTypeEnum.NEUTRALIZATION_TEXT,
|
||||
OperationTypeEnum.NEUTRALIZATION_IMAGE,
|
||||
)
|
||||
|
||||
if not _isNeutralizationCall:
|
||||
# FAIL-SAFE: Pre-flight billing validation (like 0 CHF credit card check)
|
||||
self._preflightBillingCheck()
|
||||
# Balance & provider permission checks
|
||||
await self._checkBillingBeforeAiCall()
|
||||
else:
|
||||
# Neutralization calls are system-level operations (connector anonymization).
|
||||
# They run without a mandate context (e.g. personal-scope connections) and
|
||||
# are billed the same way as embedding calls: best-effort, skipped when no
|
||||
# billing settings exist for an empty mandate.
|
||||
logger.debug(
|
||||
"callAi: skipping billing preflight for neutralization call "
|
||||
"(operationType=%s, user=%s)",
|
||||
_opType,
|
||||
getattr(getattr(self.services, 'user', None), 'id', 'unknown'),
|
||||
)
|
||||
|
||||
# Calculate effective allowedProviders: RBAC ∩ Workflow
|
||||
effectiveProviders = self._calculateEffectiveProviders()
|
||||
|
|
@ -177,6 +198,11 @@ class AiService:
|
|||
request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders})
|
||||
logger.debug(f"Effective allowedProviders for AI request: {effectiveProviders}")
|
||||
|
||||
# Calculate effective allowedModels: Workflow ∩ Request (node-level)
|
||||
effectiveModels = self._calculateEffectiveModels(request)
|
||||
if effectiveModels and request.options:
|
||||
request.options = request.options.model_copy(update={'allowedModels': effectiveModels})
|
||||
|
||||
# Neutralize prompt if enabled (before AI call)
|
||||
_wasNeutralized = False
|
||||
_excludedDocs: List[str] = []
|
||||
|
|
@ -218,13 +244,25 @@ class AiService:
|
|||
Rehydration happens on the final AiCallResponse (not on individual str deltas).
|
||||
"""
|
||||
await self.ensureAiObjectsInitialized()
|
||||
self._preflightBillingCheck()
|
||||
await self._checkBillingBeforeAiCall()
|
||||
|
||||
_streamOpType = request.options.operationType if request.options else None
|
||||
_isNeutralizationStream = _streamOpType in (
|
||||
OperationTypeEnum.NEUTRALIZATION_TEXT,
|
||||
OperationTypeEnum.NEUTRALIZATION_IMAGE,
|
||||
)
|
||||
if not _isNeutralizationStream:
|
||||
self._preflightBillingCheck()
|
||||
await self._checkBillingBeforeAiCall()
|
||||
|
||||
effectiveProviders = self._calculateEffectiveProviders()
|
||||
if effectiveProviders and request.options:
|
||||
request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders})
|
||||
|
||||
# Calculate effective allowedModels: Workflow ∩ Request (node-level)
|
||||
effectiveModels = self._calculateEffectiveModels(request)
|
||||
if effectiveModels and request.options:
|
||||
request.options = request.options.model_copy(update={'allowedModels': effectiveModels})
|
||||
|
||||
# Neutralize prompt if enabled (before streaming)
|
||||
_wasNeutralized = False
|
||||
_excludedDocs: List[str] = []
|
||||
|
|
@ -1240,6 +1278,43 @@ detectedIntent-Werte:
|
|||
logger.warning(f"Error calculating effective providers: {e}")
|
||||
return None
|
||||
|
||||
def _calculateEffectiveModels(self, request: AiCallRequest = None) -> Optional[List[str]]:
|
||||
"""
|
||||
Calculate effective allowed models: Workflow.allowedModels ∩ request.options.allowedModels.
|
||||
|
||||
AND-logic intersection:
|
||||
- If workflow specifies allowedModels, start with those.
|
||||
- If request (node-level) also specifies allowedModels, intersect.
|
||||
- Returns None if no model filtering is needed.
|
||||
"""
|
||||
try:
|
||||
effectiveModels = None
|
||||
|
||||
# Workflow-level allowedModels (from automation config)
|
||||
workflowModels = getattr(self.services, 'allowedModels', None)
|
||||
if workflowModels:
|
||||
effectiveModels = list(workflowModels)
|
||||
|
||||
# Request-level (node-level) allowedModels
|
||||
requestModels = None
|
||||
if request and request.options and request.options.allowedModels:
|
||||
requestModels = request.options.allowedModels
|
||||
|
||||
if requestModels:
|
||||
if effectiveModels:
|
||||
effectiveModels = [m for m in effectiveModels if m in requestModels]
|
||||
else:
|
||||
effectiveModels = list(requestModels)
|
||||
|
||||
if effectiveModels:
|
||||
logger.debug(f"Model filter: Workflow={workflowModels}, Request={requestModels}, Effective={effectiveModels}")
|
||||
|
||||
return effectiveModels if effectiveModels else None
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error calculating effective models: {e}")
|
||||
return None
|
||||
|
||||
async def ensureAiObjectsInitialized(self):
|
||||
"""Ensure aiObjects is initialized and submodules are ready."""
|
||||
if self.aiObjects is None:
|
||||
|
|
|
|||
|
|
@ -142,6 +142,8 @@ class AiCallLooper:
|
|||
MAX_MERGE_FAILS = 3
|
||||
mergeFailCount = 0 # Global counter for merge failures across entire loop
|
||||
lastValidCompletePart = None # Store last successfully parsed completePart for fallback
|
||||
MAX_CONSECUTIVE_EMPTY_RESPONSES = 3
|
||||
consecutive_empty_responses = 0
|
||||
|
||||
# Get parent operation ID for iteration operations (parentId should be operationId, not log entry ID)
|
||||
parentOperationId = operationId # Use the parent's operationId directly
|
||||
|
|
@ -284,8 +286,26 @@ class AiCallLooper:
|
|||
break
|
||||
|
||||
if not result or not result.strip():
|
||||
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
||||
break
|
||||
consecutive_empty_responses += 1
|
||||
logger.warning(
|
||||
"Iteration %s: Empty AI response (consecutive %s/%s) modelName=%s errorCount=%s",
|
||||
iteration,
|
||||
consecutive_empty_responses,
|
||||
MAX_CONSECUTIVE_EMPTY_RESPONSES,
|
||||
getattr(response, "modelName", None),
|
||||
getattr(response, "errorCount", None),
|
||||
)
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogFinish(iterationOperationId, False)
|
||||
if consecutive_empty_responses >= MAX_CONSECUTIVE_EMPTY_RESPONSES:
|
||||
logger.error(
|
||||
"Stopping loop: %s consecutive empty responses from model",
|
||||
consecutive_empty_responses,
|
||||
)
|
||||
break
|
||||
continue
|
||||
|
||||
consecutive_empty_responses = 0
|
||||
|
||||
# Check if this is a text response (not document generation)
|
||||
# Text responses don't need JSON parsing - return immediately after first successful response
|
||||
|
|
@ -535,7 +555,12 @@ class AiCallLooper:
|
|||
# This code path should never be reached because all registered use cases
|
||||
# return early when JSON is complete. This would only execute for use cases that
|
||||
# require section extraction, but no such use cases are currently registered.
|
||||
logger.error(f"Unexpected code path: reached end of loop without return for use case '{useCaseId}'")
|
||||
logger.error(
|
||||
"End of callAiWithLooping without success for use case %r (iterations=%s, lastResultLen=%s)",
|
||||
useCaseId,
|
||||
iteration,
|
||||
len(result) if isinstance(result, str) else 0,
|
||||
)
|
||||
return result if result else ""
|
||||
|
||||
def _isJsonStringIncomplete(self, jsonString: str) -> bool:
|
||||
|
|
|
|||
|
|
@ -90,8 +90,7 @@ class StructureGenerator:
|
|||
)
|
||||
|
||||
try:
|
||||
# Baue Chapter-Struktur-Prompt mit Content-Index
|
||||
structurePrompt = self._buildChapterStructurePrompt(
|
||||
structurePrompt, templateStructure = self._buildChapterStructurePrompt(
|
||||
userPrompt=userPrompt,
|
||||
contentParts=contentParts,
|
||||
outputFormat=outputFormat
|
||||
|
|
@ -108,12 +107,6 @@ class StructureGenerator:
|
|||
resultFormat="json"
|
||||
)
|
||||
|
||||
structurePrompt, templateStructure = self._buildChapterStructurePrompt(
|
||||
userPrompt=userPrompt,
|
||||
contentParts=contentParts,
|
||||
outputFormat=outputFormat
|
||||
)
|
||||
|
||||
# Create prompt builder for continuation support
|
||||
async def buildChapterStructurePromptWithContinuation(
|
||||
continuationContext: Any,
|
||||
|
|
@ -196,6 +189,13 @@ CRITICAL:
|
|||
contentParts=None # Do not pass ContentParts - only metadata needed, not content extraction
|
||||
)
|
||||
|
||||
if not isinstance(aiResponseJson, str) or not aiResponseJson.strip():
|
||||
raise ValueError(
|
||||
"Structure generation returned no JSON text from the model (empty response after retries). "
|
||||
"Check the AI provider, allowed models, billing, and debug artifact "
|
||||
"'chapter_structure_generation_response'."
|
||||
)
|
||||
|
||||
# Parse the complete JSON response (looping system already handles completion)
|
||||
extractedJson = self.services.utils.jsonExtractString(aiResponseJson)
|
||||
parsedJson, parseError, cleanedJson = self.services.utils.jsonTryParse(extractedJson)
|
||||
|
|
@ -215,7 +215,12 @@ CRITICAL:
|
|||
raise ValueError(f"Failed to parse JSON structure after repair: {str(parseError)}")
|
||||
else:
|
||||
logger.error(f"Failed to repair JSON. Parse error: {str(parseError)}")
|
||||
logger.error(f"Cleaned JSON preview (first 500 chars): {cleanedJson[:500]}")
|
||||
raw_preview = (extractedJson or "")[:500]
|
||||
logger.error(
|
||||
"Raw extract preview (first 500 chars): %r",
|
||||
raw_preview,
|
||||
)
|
||||
logger.error(f"Cleaned JSON preview (first 500 chars): {cleanedJson[:500]!r}")
|
||||
raise ValueError(f"Failed to parse JSON structure: {str(parseError)}")
|
||||
else:
|
||||
structure = parsedJson
|
||||
|
|
|
|||
|
|
@ -23,7 +23,11 @@ class ChatService:
|
|||
from modules.interfaces.interfaceDbManagement import getInterface as getComponentInterface
|
||||
from modules.interfaces.interfaceDbChat import getInterface as getChatInterface
|
||||
self.interfaceDbApp = getAppInterface(context.user, mandateId=context.mandate_id)
|
||||
self.interfaceDbComponent = getComponentInterface(context.user, mandateId=context.mandate_id)
|
||||
self.interfaceDbComponent = getComponentInterface(
|
||||
context.user,
|
||||
mandateId=context.mandate_id,
|
||||
featureInstanceId=context.feature_instance_id,
|
||||
)
|
||||
self.interfaceDbChat = getChatInterface(
|
||||
context.user,
|
||||
mandateId=context.mandate_id,
|
||||
|
|
@ -199,13 +203,8 @@ class ChatService:
|
|||
label = parts[1]
|
||||
messageFound = None
|
||||
for message in workflow.messages:
|
||||
# Validate message belongs to this workflow
|
||||
msgWorkflowId = getattr(message, 'workflowId', None)
|
||||
if not msgWorkflowId or msgWorkflowId != workflowId:
|
||||
if msgWorkflowId:
|
||||
logger.warning(f"Message {message.id} has workflowId {msgWorkflowId} but belongs to workflow {workflowId}. Skipping.")
|
||||
else:
|
||||
logger.warning(f"Message {message.id} has no workflowId. Skipping.")
|
||||
continue
|
||||
|
||||
msgLabel = getattr(message, 'documentsLabel', None)
|
||||
|
|
@ -213,7 +212,6 @@ class ChatService:
|
|||
messageFound = message
|
||||
break
|
||||
|
||||
# If found, add documents
|
||||
if messageFound and messageFound.documents:
|
||||
allDocuments.extend(messageFound.documents)
|
||||
else:
|
||||
|
|
@ -419,7 +417,7 @@ class ChatService:
|
|||
return None
|
||||
|
||||
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
|
||||
"""Get file information including new fields (tags, folderId, description, status)."""
|
||||
"""Get file information including new fields (tags, description, status)."""
|
||||
fileItem = self.interfaceDbComponent.getFile(fileId)
|
||||
if fileItem:
|
||||
return {
|
||||
|
|
@ -430,7 +428,6 @@ class ChatService:
|
|||
"fileHash": fileItem.fileHash,
|
||||
"creationDate": fileItem.sysCreatedAt,
|
||||
"tags": getattr(fileItem, "tags", None),
|
||||
"folderId": getattr(fileItem, "folderId", None),
|
||||
"description": getattr(fileItem, "description", None),
|
||||
"status": getattr(fileItem, "status", None),
|
||||
}
|
||||
|
|
@ -449,14 +446,12 @@ class ChatService:
|
|||
|
||||
def listFiles(
|
||||
self,
|
||||
folderId: str = None,
|
||||
tags: List[str] = None,
|
||||
search: str = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""List files for the current user with optional filters.
|
||||
|
||||
Args:
|
||||
folderId: Filter by folder (None = root / all).
|
||||
tags: Filter by tags (any match).
|
||||
search: Search in fileName and description.
|
||||
|
||||
|
|
@ -469,10 +464,6 @@ class ChatService:
|
|||
allFiles = self.interfaceDbComponent.getAllFiles()
|
||||
results = []
|
||||
for fileItem in allFiles:
|
||||
if folderId is not None:
|
||||
if fileItem.get("folderId") != folderId:
|
||||
continue
|
||||
|
||||
if tags:
|
||||
itemTags = fileItem.get("tags") or []
|
||||
if not any(t in itemTags for t in tags):
|
||||
|
|
@ -492,27 +483,40 @@ class ChatService:
|
|||
"fileSize": fileItem.get("fileSize"),
|
||||
"creationDate": fileItem.get("sysCreatedAt"),
|
||||
"tags": fileItem.get("tags"),
|
||||
"folderId": fileItem.get("folderId"),
|
||||
"description": fileItem.get("description"),
|
||||
"status": fileItem.get("status"),
|
||||
})
|
||||
return results
|
||||
|
||||
def listFolders(self, parentId: str = None) -> List[Dict[str, Any]]:
|
||||
"""List file folders for the current user.
|
||||
def listGroups(self, contextKey: str = "files/list") -> list:
|
||||
"""List all groups in the groupTree for the current context."""
|
||||
try:
|
||||
existing = self.interfaceDbApp.getTableGrouping(contextKey)
|
||||
if not existing:
|
||||
return []
|
||||
def _flatten(nodes, depth=0):
|
||||
result = []
|
||||
for n in nodes:
|
||||
nd = n.model_dump() if hasattr(n, "model_dump") else (n if isinstance(n, dict) else vars(n))
|
||||
result.append({"id": nd.get("id"), "name": nd.get("name"), "depth": depth, "itemCount": len(nd.get("itemIds", []))})
|
||||
result.extend(_flatten(nd.get("subGroups", []), depth + 1))
|
||||
return result
|
||||
return _flatten(existing.rootGroups)
|
||||
except Exception as e:
|
||||
return []
|
||||
|
||||
Args:
|
||||
parentId: Optional parent folder ID to filter by.
|
||||
None = return ALL folders (for tree building).
|
||||
|
||||
Returns:
|
||||
List of folder dicts.
|
||||
"""
|
||||
return self.interfaceDbComponent.listFolders(parentId=parentId)
|
||||
|
||||
def createFolder(self, name: str, parentId: str = None) -> Dict[str, Any]:
|
||||
"""Create a new file folder with unique name validation."""
|
||||
return self.interfaceDbComponent.createFolder(name=name, parentId=parentId)
|
||||
def listFilesInGroup(self, groupId: str, contextKey: str = "files/list") -> list:
|
||||
"""List file IDs in a specific group (recursive)."""
|
||||
try:
|
||||
from modules.routes.routeHelpers import _collectItemIds
|
||||
existing = self.interfaceDbApp.getTableGrouping(contextKey)
|
||||
if not existing:
|
||||
return []
|
||||
nodes = [n.model_dump() if hasattr(n, "model_dump") else (n if isinstance(n, dict) else vars(n)) for n in existing.rootGroups]
|
||||
ids = _collectItemIds(nodes, groupId)
|
||||
return list(ids) if ids else []
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
# ---- DataSource CRUD ----
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from .subDocumentUtility import (
|
|||
detectMimeTypeFromData,
|
||||
convertDocumentDataToString
|
||||
)
|
||||
from .styleDefaults import resolveStyle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -382,7 +383,7 @@ class GenerationService:
|
|||
'workflowId': 'unknown'
|
||||
}
|
||||
|
||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
|
||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render extracted JSON content to the specified output format.
|
||||
Processes EACH document separately and calls renderer for each.
|
||||
|
|
@ -399,12 +400,14 @@ class GenerationService:
|
|||
userPrompt: User's original prompt for report generation
|
||||
aiService: AI service instance for generation prompt creation
|
||||
parentOperationId: Optional parent operation ID for hierarchical logging
|
||||
style: Optional style overrides (deep-merged with DEFAULT_STYLE)
|
||||
|
||||
Returns:
|
||||
List of RenderedDocument objects.
|
||||
Each RenderedDocument represents one rendered file (main document or supporting file)
|
||||
"""
|
||||
try:
|
||||
resolvedStyle = resolveStyle(style)
|
||||
# Validate JSON input
|
||||
if not isinstance(extractedContent, dict):
|
||||
raise ValueError("extractedContent must be a JSON dictionary")
|
||||
|
|
@ -469,7 +472,7 @@ class GenerationService:
|
|||
docTitle = doc.get("title", title)
|
||||
|
||||
# Render this document (can return multiple files, e.g., HTML + images)
|
||||
renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService)
|
||||
renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService, style=resolvedStyle)
|
||||
allRenderedDocuments.extend(renderedDocs)
|
||||
|
||||
logger.info(f"Rendered {len(documents)} document(s) into {len(allRenderedDocuments)} file(s)")
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ class BaseRenderer(ABC):
|
|||
return list(supportedSectionTypes)
|
||||
|
||||
@abstractmethod
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render extracted JSON content to multiple documents.
|
||||
Each renderer must implement this method.
|
||||
|
|
@ -95,6 +95,9 @@ class BaseRenderer(ABC):
|
|||
title: Report title
|
||||
userPrompt: Original user prompt for context
|
||||
aiService: AI service instance for additional processing
|
||||
style: Fully-resolved unified style dict from styleDefaults.resolveStyle().
|
||||
When provided, renderers use these values instead of their
|
||||
own defaults / AI-generated styles.
|
||||
|
||||
Returns:
|
||||
List of RenderedDocument objects.
|
||||
|
|
@ -102,6 +105,112 @@ class BaseRenderer(ABC):
|
|||
Even if only one document is returned, it must be wrapped in a list.
|
||||
"""
|
||||
pass
|
||||
|
||||
def _convertUnifiedStyleToInternal(self, style: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert the unified resolvedStyle dict (from styleDefaults) into
|
||||
the renderer-internal style-set format that all rendering methods already
|
||||
consume. Override in subclasses for format-specific tweaks."""
|
||||
h1 = style["headings"]["h1"]
|
||||
h2 = style["headings"]["h2"]
|
||||
h3 = style["headings"].get("h3", h2)
|
||||
h4 = style["headings"].get("h4", h3)
|
||||
tbl = style["table"]
|
||||
para = style["paragraph"]
|
||||
lst = style["list"]
|
||||
cb = style["codeBlock"]
|
||||
return {
|
||||
"title": {
|
||||
"font_size": h1["sizePt"], "color": h1["color"],
|
||||
"bold": h1.get("weight") == "bold", "align": "left",
|
||||
},
|
||||
"heading1": {
|
||||
"font_size": h1["sizePt"], "color": h1["color"],
|
||||
"bold": h1.get("weight") == "bold", "align": "left",
|
||||
},
|
||||
"heading2": {
|
||||
"font_size": h2["sizePt"], "color": h2["color"],
|
||||
"bold": h2.get("weight") == "bold", "align": "left",
|
||||
},
|
||||
"heading3": {
|
||||
"font_size": h3["sizePt"], "color": h3["color"],
|
||||
"bold": h3.get("weight") == "bold", "align": "left",
|
||||
},
|
||||
"heading4": {
|
||||
"font_size": h4["sizePt"], "color": h4["color"],
|
||||
"bold": h4.get("weight") == "bold", "align": "left",
|
||||
},
|
||||
"paragraph": {
|
||||
"font_size": para["sizePt"], "color": para["color"],
|
||||
"bold": False, "align": "left",
|
||||
},
|
||||
"table_header": {
|
||||
"background": tbl["headerBg"], "text_color": tbl["headerFg"],
|
||||
"bold": True, "align": "center",
|
||||
},
|
||||
"table_cell": {
|
||||
"background": tbl["rowBandingOdd"], "text_color": para["color"],
|
||||
"bold": False, "align": "left",
|
||||
},
|
||||
"table_border": {
|
||||
"style": "grid", "color": tbl["borderColor"],
|
||||
},
|
||||
"bullet_list": {
|
||||
"font_size": lst["sizePt"], "color": para["color"],
|
||||
"indent": lst["indentPt"],
|
||||
},
|
||||
"code_block": {
|
||||
"font": style["fonts"]["monospace"],
|
||||
"font_size": cb["fontSizePt"], "color": para["color"],
|
||||
"background": cb["background"],
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _inlineRunsFromContent(content: Dict[str, Any], *, itemsKey: str = None) -> Any:
|
||||
"""Extract inline runs from new-format content, falling back to old format.
|
||||
|
||||
For paragraphs (itemsKey=None):
|
||||
new: content["inlineRuns"] -> List[InlineRun]
|
||||
old: content["text"] -> wrapped in [{"type":"text","value":text}]
|
||||
|
||||
For list items (itemsKey="items"):
|
||||
new: content["items"] is List[List[InlineRun]]
|
||||
old: content["items"] is List[str] or List[{"text":…}]
|
||||
Returns the items list (caller decides per-item conversion).
|
||||
|
||||
For table headers/cells:
|
||||
new: each header/cell is List[InlineRun]
|
||||
old: each header/cell is a plain str
|
||||
Caller handles per-cell.
|
||||
"""
|
||||
if itemsKey:
|
||||
return content.get(itemsKey, [])
|
||||
inlineRuns = content.get("inlineRuns")
|
||||
if inlineRuns:
|
||||
return inlineRuns
|
||||
text = content.get("text", "")
|
||||
if text:
|
||||
return [{"type": "text", "value": text}]
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _inlineRunsForCell(cell) -> list:
|
||||
"""Normalize a single table header or cell value to List[InlineRun].
|
||||
Accepts either a plain string or an already-correct list of run dicts."""
|
||||
if isinstance(cell, list):
|
||||
return cell
|
||||
return [{"type": "text", "value": str(cell) if cell is not None else ""}]
|
||||
|
||||
@staticmethod
|
||||
def _inlineRunsForListItem(item) -> list:
|
||||
"""Normalize a single list item to List[InlineRun].
|
||||
Accepts a plain string, a dict with 'text', or an already-correct list of run dicts."""
|
||||
if isinstance(item, list):
|
||||
return item
|
||||
if isinstance(item, dict):
|
||||
text = item.get("text", "")
|
||||
return [{"type": "text", "value": text}]
|
||||
return [{"type": "text", "value": str(item)}]
|
||||
|
||||
def _determineFilename(self, title: str, mimeType: str) -> str:
|
||||
"""Determine filename from title and mimeType."""
|
||||
|
|
|
|||
|
|
@ -79,7 +79,15 @@ class RendererCodeCsv(BaseCodeRenderer):
|
|||
|
||||
return renderedDocs
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(
|
||||
self,
|
||||
extractedContent: Dict[str, Any],
|
||||
title: str,
|
||||
userPrompt: str = None,
|
||||
aiService=None,
|
||||
*,
|
||||
style: Dict[str, Any] = None,
|
||||
) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render method for document generation compatibility.
|
||||
Delegates to document renderer if needed, or handles code files directly.
|
||||
|
|
@ -94,7 +102,7 @@ class RendererCodeCsv(BaseCodeRenderer):
|
|||
# Document generation path - delegate to document renderer
|
||||
from .rendererCsv import RendererCsv
|
||||
documentRenderer = RendererCsv(self.services)
|
||||
return await documentRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||
return await documentRenderer.render(extractedContent, title, userPrompt, aiService, style=style)
|
||||
|
||||
def _validateAndFixCsv(self, content: str) -> str:
|
||||
"""Validate CSV structure and fix common issues."""
|
||||
|
|
|
|||
|
|
@ -91,7 +91,15 @@ class RendererCodeJson(BaseCodeRenderer):
|
|||
|
||||
return renderedDocs
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(
|
||||
self,
|
||||
extractedContent: Dict[str, Any],
|
||||
title: str,
|
||||
userPrompt: str = None,
|
||||
aiService=None,
|
||||
*,
|
||||
style: Dict[str, Any] = None,
|
||||
) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render method for document generation compatibility.
|
||||
Delegates to document renderer if needed, or handles code files directly.
|
||||
|
|
@ -107,7 +115,7 @@ class RendererCodeJson(BaseCodeRenderer):
|
|||
# Import here to avoid circular dependency
|
||||
from .rendererJson import RendererJson
|
||||
documentRenderer = RendererJson(self.services)
|
||||
return await documentRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||
return await documentRenderer.render(extractedContent, title, userPrompt, aiService, style=style)
|
||||
|
||||
def _extractJsonStatistics(self, parsed: Any) -> Dict[str, Any]:
|
||||
"""Extract JSON statistics for validation (object count, array count, key count)."""
|
||||
|
|
|
|||
|
|
@ -78,11 +78,20 @@ class RendererCodeXml(BaseCodeRenderer):
|
|||
|
||||
return renderedDocs
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(
|
||||
self,
|
||||
extractedContent: Dict[str, Any],
|
||||
title: str,
|
||||
userPrompt: str = None,
|
||||
aiService=None,
|
||||
*,
|
||||
style: Dict[str, Any] = None,
|
||||
) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render method for document generation compatibility.
|
||||
For XML, we only support code generation (no document renderer exists yet).
|
||||
"""
|
||||
_ = style
|
||||
# Check if this is code generation (has files array)
|
||||
if "files" in extractedContent:
|
||||
# Code generation path - use renderCodeFiles
|
||||
|
|
|
|||
|
|
@ -39,8 +39,17 @@ class RendererCsv(BaseRenderer):
|
|||
"""
|
||||
return ["table", "code_block"]
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(
|
||||
self,
|
||||
extractedContent: Dict[str, Any],
|
||||
title: str,
|
||||
userPrompt: str = None,
|
||||
aiService=None,
|
||||
*,
|
||||
style: Dict[str, Any] = None,
|
||||
) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to CSV format. Produces one CSV file per table section."""
|
||||
_ = style
|
||||
try:
|
||||
# Validate JSON structure
|
||||
if not self._validateJsonStructure(extractedContent):
|
||||
|
|
|
|||
|
|
@ -53,18 +53,17 @@ class RendererDocx(BaseRenderer):
|
|||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
|
||||
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
|
||||
try:
|
||||
if not DOCX_AVAILABLE:
|
||||
# Fallback to HTML if python-docx not available
|
||||
from .rendererHtml import RendererHtml
|
||||
htmlRenderer = RendererHtml()
|
||||
return await htmlRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||
return await htmlRenderer.render(extractedContent, title, userPrompt, aiService, style=style)
|
||||
|
||||
# Generate DOCX using AI-analyzed styling
|
||||
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
|
||||
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style)
|
||||
|
||||
# Extract metadata for document type and other info
|
||||
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
||||
|
|
@ -114,23 +113,27 @@ class RendererDocx(BaseRenderer):
|
|||
)
|
||||
]
|
||||
|
||||
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, unifiedStyle: Dict[str, Any] = None) -> str:
|
||||
"""Generate DOCX content from structured JSON document."""
|
||||
import time
|
||||
start_time = time.time()
|
||||
try:
|
||||
self.logger.debug("_generateDocxFromJson: Starting document generation")
|
||||
# Create new document
|
||||
doc = Document()
|
||||
self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s")
|
||||
|
||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||
template_from_metadata = None
|
||||
if json_content and isinstance(json_content.get("metadata"), dict):
|
||||
template_from_metadata = json_content["metadata"].get("templateName")
|
||||
# Phase 3: prefer unified style when provided
|
||||
style_start = time.time()
|
||||
self.logger.debug("_generateDocxFromJson: About to get style set")
|
||||
styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata)
|
||||
if unifiedStyle:
|
||||
styleSet = self._convertUnifiedStyleToInternal(unifiedStyle)
|
||||
self._unifiedStyle = unifiedStyle
|
||||
else:
|
||||
template_from_metadata = None
|
||||
if json_content and isinstance(json_content.get("metadata"), dict):
|
||||
template_from_metadata = json_content["metadata"].get("templateName")
|
||||
styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata)
|
||||
self._unifiedStyle = None
|
||||
self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s")
|
||||
|
||||
# Setup basic document styles and create all styles from style set
|
||||
|
|
@ -298,11 +301,11 @@ class RendererDocx(BaseRenderer):
|
|||
def _setupBasicDocumentStyles(self, doc: Document) -> None:
|
||||
"""Set up basic document styles."""
|
||||
try:
|
||||
# Set default font
|
||||
style = doc.styles['Normal']
|
||||
font = style.font
|
||||
font.name = 'Calibri'
|
||||
font.size = Pt(11)
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
font.name = us["fonts"]["primary"] if us else 'Calibri'
|
||||
font.size = Pt(us["paragraph"]["sizePt"] if us else 11)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not set up basic document styles: {str(e)}")
|
||||
|
||||
|
|
@ -421,6 +424,8 @@ class RendererDocx(BaseRenderer):
|
|||
def _addMarkdownInlineRuns(self, paragraph, text: str) -> None:
|
||||
"""Parse markdown inline formatting and add corresponding Runs to a python-docx paragraph."""
|
||||
pos = 0
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
monoFont = us["fonts"]["monospace"] if us else "Courier New"
|
||||
for m in self._MD_INLINE_RE.finditer(text):
|
||||
if m.start() > pos:
|
||||
paragraph.add_run(text[pos:m.start()])
|
||||
|
|
@ -434,12 +439,45 @@ class RendererDocx(BaseRenderer):
|
|||
paragraph.add_run(m.group(6)).italic = True
|
||||
elif m.group(7):
|
||||
run = paragraph.add_run(m.group(7))
|
||||
run.font.name = "Courier New"
|
||||
run.font.name = monoFont
|
||||
run.font.size = Pt(9)
|
||||
pos = m.end()
|
||||
if pos < len(text):
|
||||
paragraph.add_run(text[pos:])
|
||||
|
||||
def _renderInlineRuns(self, runs: list, paragraph, styleSet: Dict[str, Any]) -> None:
|
||||
"""Process a list of InlineRun dicts into python-docx Runs on a paragraph."""
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
monoFont = us["fonts"]["monospace"] if us else "Courier New"
|
||||
for run in runs:
|
||||
runType = run.get("type", "text")
|
||||
value = run.get("value", "")
|
||||
if runType == "text":
|
||||
paragraph.add_run(value)
|
||||
elif runType == "bold":
|
||||
paragraph.add_run(value).bold = True
|
||||
elif runType == "italic":
|
||||
paragraph.add_run(value).italic = True
|
||||
elif runType == "code":
|
||||
r = paragraph.add_run(value)
|
||||
r.font.name = monoFont
|
||||
r.font.size = Pt(9)
|
||||
elif runType == "link":
|
||||
r = paragraph.add_run(value)
|
||||
r.font.underline = True
|
||||
r.font.color.rgb = RGBColor(0x29, 0x80, 0xB9)
|
||||
elif runType == "image":
|
||||
b64 = run.get("base64Data", "")
|
||||
if b64:
|
||||
try:
|
||||
imgBytes = base64.b64decode(b64)
|
||||
imgStream = io.BytesIO(imgBytes)
|
||||
paragraph.add_run().add_picture(imgStream, width=Inches(2))
|
||||
except Exception:
|
||||
paragraph.add_run(f"[Image: {run.get('altText', '')}]")
|
||||
else:
|
||||
paragraph.add_run(value)
|
||||
|
||||
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Render a JSON table to DOCX using AI-generated styles.
|
||||
|
|
@ -485,7 +523,7 @@ class RendererDocx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.error(f"Error rendering table: {str(e)}", exc_info=True)
|
||||
|
||||
def _renderTableFastXml(self, doc: Document, headers: List[str], rows: List[List[Any]], styles: Dict[str, Any]) -> None:
|
||||
def _renderTableFastXml(self, doc: Document, headers: list, rows: list, styles: Dict[str, Any]) -> None:
|
||||
"""
|
||||
High-performance table rendering using direct XML manipulation.
|
||||
|
||||
|
|
@ -546,24 +584,34 @@ class RendererDocx(BaseRenderer):
|
|||
# Build all rows using fast XML
|
||||
rows_start = time.time()
|
||||
|
||||
# Header row
|
||||
headerRow = self._createTableRowXml(headers, isHeader=True)
|
||||
# Resolve header style colors
|
||||
tableStyle = styles.get("table_header", {})
|
||||
headerBg = tableStyle.get("background", "")
|
||||
headerFg = tableStyle.get("text_color", "")
|
||||
|
||||
# Flatten inline-run headers to plain strings for fast XML path
|
||||
flatHeaders = []
|
||||
for h in headers:
|
||||
runs = self._inlineRunsForCell(h)
|
||||
flatHeaders.append("".join(r.get("value", "") for r in runs))
|
||||
|
||||
headerRow = self._createTableRowXml(flatHeaders, isHeader=True, headerBgHex=headerBg or None, headerFgHex=headerFg or None)
|
||||
tbl.append(headerRow)
|
||||
|
||||
|
||||
header_time = time.time() - rows_start
|
||||
self.logger.debug(f"_renderTableFastXml: Header row created in {header_time:.3f}s")
|
||||
|
||||
# Data rows - batch process for performance
|
||||
|
||||
data_start = time.time()
|
||||
rowCount = len(rows)
|
||||
|
||||
|
||||
for idx, rowData in enumerate(rows):
|
||||
# Convert all cells to strings
|
||||
cellTexts = [str(cell) if cell is not None else '' for cell in rowData]
|
||||
# Pad if needed
|
||||
while len(cellTexts) < len(headers):
|
||||
cellTexts = []
|
||||
for cell in rowData:
|
||||
runs = self._inlineRunsForCell(cell)
|
||||
cellTexts.append("".join(r.get("value", "") for r in runs))
|
||||
while len(cellTexts) < len(flatHeaders):
|
||||
cellTexts.append('')
|
||||
|
||||
|
||||
row = self._createTableRowXml(cellTexts, isHeader=False)
|
||||
tbl.append(row)
|
||||
|
||||
|
|
@ -641,74 +689,64 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
return tblBorders
|
||||
|
||||
def _createTableRowXml(self, cells: List[str], isHeader: bool = False) -> Any:
|
||||
"""
|
||||
Create a table row XML element with cells.
|
||||
|
||||
This is the core fast-path: builds the row XML directly without
|
||||
going through python-docx's slow cell.text assignment.
|
||||
"""
|
||||
def _createTableRowXml(self, cells: list, isHeader: bool = False, headerBgHex: str = None, headerFgHex: str = None) -> Any:
|
||||
"""Create a table row XML element with cells.
|
||||
Fast-path: builds row XML directly via lxml."""
|
||||
from docx.oxml.shared import OxmlElement, qn
|
||||
|
||||
|
||||
if headerBgHex is None:
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
headerBgHex = us["table"]["headerBg"].lstrip('#') if us else '1F3864'
|
||||
else:
|
||||
headerBgHex = headerBgHex.lstrip('#')
|
||||
if headerFgHex is None:
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
headerFgHex = us["table"]["headerFg"].lstrip('#') if us else 'FFFFFF'
|
||||
else:
|
||||
headerFgHex = headerFgHex.lstrip('#')
|
||||
|
||||
tr = OxmlElement('w:tr')
|
||||
|
||||
# Row properties for header
|
||||
if isHeader:
|
||||
trPr = OxmlElement('w:trPr')
|
||||
tblHeader = OxmlElement('w:tblHeader')
|
||||
trPr.append(tblHeader)
|
||||
trPr.append(OxmlElement('w:tblHeader'))
|
||||
tr.append(trPr)
|
||||
|
||||
|
||||
for cellText in cells:
|
||||
# Create cell
|
||||
tc = OxmlElement('w:tc')
|
||||
|
||||
# Cell properties
|
||||
tcPr = OxmlElement('w:tcPr')
|
||||
tcW = OxmlElement('w:tcW')
|
||||
tcW.set(qn('w:type'), 'auto')
|
||||
tcW.set(qn('w:w'), '0')
|
||||
tcPr.append(tcW)
|
||||
|
||||
# Header cell styling - light blue background
|
||||
|
||||
if isHeader:
|
||||
shd = OxmlElement('w:shd')
|
||||
shd.set(qn('w:val'), 'clear')
|
||||
shd.set(qn('w:color'), 'auto')
|
||||
shd.set(qn('w:fill'), '4472C4') # Professional blue
|
||||
shd.set(qn('w:fill'), headerBgHex)
|
||||
tcPr.append(shd)
|
||||
|
||||
|
||||
tc.append(tcPr)
|
||||
|
||||
# Paragraph with text
|
||||
p = OxmlElement('w:p')
|
||||
|
||||
# Add run with text
|
||||
r = OxmlElement('w:r')
|
||||
|
||||
# Header text styling - bold and white
|
||||
|
||||
if isHeader:
|
||||
rPr = OxmlElement('w:rPr')
|
||||
b = OxmlElement('w:b')
|
||||
rPr.append(b)
|
||||
# White text color
|
||||
rPr.append(OxmlElement('w:b'))
|
||||
color = OxmlElement('w:color')
|
||||
color.set(qn('w:val'), 'FFFFFF')
|
||||
color.set(qn('w:val'), headerFgHex)
|
||||
rPr.append(color)
|
||||
r.append(rPr)
|
||||
|
||||
# Text element
|
||||
|
||||
t = OxmlElement('w:t')
|
||||
# Preserve spaces if text starts/ends with whitespace
|
||||
if cellText and (cellText[0] == ' ' or cellText[-1] == ' '):
|
||||
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||||
t.text = cellText
|
||||
r.append(t)
|
||||
|
||||
p.append(r)
|
||||
tc.append(p)
|
||||
tr.append(tc)
|
||||
|
||||
|
||||
return tr
|
||||
|
||||
def _applyHorizontalBordersOnly(self, table) -> None:
|
||||
|
|
@ -836,47 +874,37 @@ class RendererDocx(BaseRenderer):
|
|||
def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = list_data.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return
|
||||
items = content.get("items", [])
|
||||
bullet_style = styles.get("bullet_list", {})
|
||||
|
||||
# Pre-calculate and cache style objects to avoid repeated parsing
|
||||
font_size_pt = None
|
||||
|
||||
font_size_pt = Pt(bullet_style["font_size"]) if bullet_style.get("font_size") else None
|
||||
text_color_rgb = None
|
||||
if bullet_style:
|
||||
if "font_size" in bullet_style:
|
||||
font_size_pt = Pt(bullet_style["font_size"])
|
||||
if "color" in bullet_style:
|
||||
color_hex = bullet_style["color"].lstrip('#')
|
||||
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
|
||||
if bullet_style.get("color"):
|
||||
color_hex = bullet_style["color"].lstrip('#')
|
||||
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
|
||||
for item in items:
|
||||
itemText = item if isinstance(item, str) else (item.get("text", "") if isinstance(item, dict) else "")
|
||||
if not itemText:
|
||||
itemRuns = self._inlineRunsForListItem(item)
|
||||
if not itemRuns or not any(r.get("value") for r in itemRuns):
|
||||
continue
|
||||
para = doc.add_paragraph(style='List Bullet')
|
||||
self._addMarkdownInlineRuns(para, itemText)
|
||||
|
||||
# Apply bullet list styling from style set - use cached objects
|
||||
if bullet_style and para.runs:
|
||||
# Use direct access instead of iterating
|
||||
if len(para.runs) > 0:
|
||||
run = para.runs[0]
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
else:
|
||||
# Create run if none exists
|
||||
run = para.add_run()
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
|
||||
isNewRunFormat = isinstance(item, list)
|
||||
if isNewRunFormat:
|
||||
self._renderInlineRuns(itemRuns, para, styles)
|
||||
else:
|
||||
itemText = "".join(r.get("value", "") for r in itemRuns)
|
||||
self._addMarkdownInlineRuns(para, itemText)
|
||||
|
||||
if bullet_style and para.runs and len(para.runs) > 0:
|
||||
run = para.runs[0]
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
|
||||
|
|
@ -905,90 +933,79 @@ class RendererDocx(BaseRenderer):
|
|||
def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON paragraph to DOCX using AI-generated styles."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = paragraph_data.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
text = content.get("text", "")
|
||||
inlineRuns = self._inlineRunsFromContent(content)
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
inlineRuns = [{"type": "text", "value": content}]
|
||||
else:
|
||||
text = ""
|
||||
|
||||
# CRITICAL: Prevent rendering base64 image data as text
|
||||
# Base64 image data typically starts with /9j/ (JPEG) or iVBORw0KGgo (PNG)
|
||||
if text and (text.startswith("/9j/") or text.startswith("iVBORw0KGgo") or
|
||||
(len(text) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in text[:100]))):
|
||||
# This looks like base64 data - don't render as text
|
||||
self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(text)})")
|
||||
inlineRuns = []
|
||||
|
||||
if not inlineRuns:
|
||||
return
|
||||
|
||||
plainText = "".join(r.get("value", "") for r in inlineRuns)
|
||||
if plainText and (plainText.startswith("/9j/") or plainText.startswith("iVBORw0KGgo") or
|
||||
(len(plainText) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in plainText[:100]))):
|
||||
self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(plainText)})")
|
||||
para = doc.add_paragraph("[Error: Image data found in text content - image embedding may have failed]")
|
||||
if para.runs:
|
||||
para.runs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color for error
|
||||
para.runs[0].font.color.rgb = RGBColor(255, 0, 0)
|
||||
return
|
||||
|
||||
if text:
|
||||
para = doc.add_paragraph()
|
||||
self._addMarkdownInlineRuns(para, text)
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
if paragraph_style:
|
||||
# Pre-calculate and cache style objects
|
||||
font_size_pt = None
|
||||
text_color_rgb = None
|
||||
if "font_size" in paragraph_style:
|
||||
font_size_pt = Pt(paragraph_style["font_size"])
|
||||
if "color" in paragraph_style:
|
||||
color_hex = paragraph_style["color"].lstrip('#')
|
||||
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
bold = paragraph_style.get("bold", False)
|
||||
|
||||
# Use direct access instead of iterating
|
||||
if len(para.runs) > 0:
|
||||
run = para.runs[0]
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
run.font.bold = bold
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
|
||||
para = doc.add_paragraph()
|
||||
hasNewRuns = content.get("inlineRuns") if isinstance(content, dict) else None
|
||||
if hasNewRuns:
|
||||
self._renderInlineRuns(inlineRuns, para, styles)
|
||||
else:
|
||||
self._addMarkdownInlineRuns(para, plainText)
|
||||
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
if paragraph_style:
|
||||
font_size_pt = Pt(paragraph_style["font_size"]) if "font_size" in paragraph_style else None
|
||||
text_color_rgb = None
|
||||
if "color" in paragraph_style:
|
||||
color_hex = paragraph_style["color"].lstrip('#')
|
||||
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
bold = paragraph_style.get("bold", False)
|
||||
if len(para.runs) > 0:
|
||||
run = para.runs[0]
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
run.font.bold = bold
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
if "align" in paragraph_style:
|
||||
align = paragraph_style["align"]
|
||||
if align == "center":
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
elif align == "right":
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
||||
else:
|
||||
# Create run if none exists
|
||||
run = para.add_run()
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
run.font.bold = bold
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
|
||||
if "align" in paragraph_style:
|
||||
align = paragraph_style["align"]
|
||||
if align == "center":
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
elif align == "right":
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
||||
else:
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
|
||||
def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON code block to DOCX using AI-generated styles."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = code_data.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return
|
||||
code = content.get("code", "")
|
||||
language = content.get("language", "")
|
||||
code_style = styles.get("code_block", {})
|
||||
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
|
||||
if code:
|
||||
if language:
|
||||
lang_para = doc.add_paragraph(f"Code ({language}):")
|
||||
if len(lang_para.runs) > 0:
|
||||
lang_para.runs[0].bold = True
|
||||
|
||||
# Pre-calculate and cache style objects
|
||||
code_font_name = code_style.get("font", "Courier New")
|
||||
code_font_size_pt = Pt(code_style.get("font_size", 9))
|
||||
|
||||
code_font_name = code_style.get("font", us["fonts"]["monospace"] if us else "Courier New")
|
||||
code_font_size_pt = Pt(code_style.get("font_size", us["codeBlock"]["fontSizePt"] if us else 9))
|
||||
code_text_color_rgb = None
|
||||
if "color" in code_style:
|
||||
color_hex = code_style["color"].lstrip('#')
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class RendererHtml(BaseRenderer):
|
|||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render HTML document with images as separate files.
|
||||
Returns list of documents: [HTML document, image1, image2, ...]
|
||||
|
|
@ -54,7 +54,7 @@ class RendererHtml(BaseRenderer):
|
|||
self._renderedImages = images
|
||||
|
||||
# Generate HTML using AI-analyzed styling
|
||||
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
|
||||
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService, style=style)
|
||||
|
||||
# Replace base64 data URIs with relative file paths if images exist
|
||||
if images:
|
||||
|
|
@ -107,11 +107,16 @@ class RendererHtml(BaseRenderer):
|
|||
|
||||
return resultDocuments
|
||||
|
||||
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str:
|
||||
"""Generate HTML content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
|
||||
# Use unified style when provided, otherwise fall back to existing flow
|
||||
if style:
|
||||
styles = self._convertUnifiedStyleToInternal(style)
|
||||
self._unifiedStyle = style
|
||||
else:
|
||||
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
|
||||
self._unifiedStyle = None
|
||||
|
||||
# Validate JSON structure
|
||||
if not self._validateJsonStructure(jsonContent):
|
||||
|
|
@ -272,6 +277,10 @@ class RendererHtml(BaseRenderer):
|
|||
|
||||
def _generateCssStyles(self, styles: Dict[str, Any]) -> str:
|
||||
"""Generate CSS from style definitions."""
|
||||
# When unified style is available, generate CSS directly from it
|
||||
if getattr(self, "_unifiedStyle", None):
|
||||
return self._generateCssFromUnifiedStyle(self._unifiedStyle)
|
||||
|
||||
css_parts = []
|
||||
|
||||
# Body styles
|
||||
|
|
@ -368,6 +377,164 @@ class RendererHtml(BaseRenderer):
|
|||
|
||||
return '\n'.join(css_parts)
|
||||
|
||||
def _generateCssFromUnifiedStyle(self, style: Dict[str, Any]) -> str:
|
||||
"""Generate CSS directly from unified style dict."""
|
||||
fonts = style.get("fonts", {})
|
||||
colors = style.get("colors", {})
|
||||
headings = style.get("headings", {})
|
||||
para = style.get("paragraph", {})
|
||||
tbl = style.get("table", {})
|
||||
lst = style.get("list", {})
|
||||
cb = style.get("codeBlock", {})
|
||||
page = style.get("page", {})
|
||||
|
||||
primaryFont = fonts.get("primary", "Arial, sans-serif")
|
||||
monoFont = fonts.get("monospace", "Courier New, monospace")
|
||||
bgColor = colors.get("background", "#FFFFFF")
|
||||
primaryColor = colors.get("primary", "#1F3864")
|
||||
paraColor = para.get("color", "#333333")
|
||||
paraSizePt = para.get("sizePt", 11)
|
||||
lineSpacing = para.get("lineSpacing", 1.15)
|
||||
|
||||
css_parts = []
|
||||
|
||||
# Body
|
||||
css_parts.append("body {")
|
||||
css_parts.append(f" font-family: {primaryFont};")
|
||||
css_parts.append(f" background: {bgColor};")
|
||||
css_parts.append(f" color: {paraColor};")
|
||||
css_parts.append(f" font-size: {paraSizePt}pt;")
|
||||
css_parts.append(f" line-height: {lineSpacing};")
|
||||
margins = page.get("marginsPt", {})
|
||||
if margins:
|
||||
css_parts.append(f" margin: {margins.get('top', 60)}pt {margins.get('right', 60)}pt {margins.get('bottom', 60)}pt {margins.get('left', 60)}pt;")
|
||||
else:
|
||||
css_parts.append(" margin: 0; padding: 20px;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Document title (uses h1 style)
|
||||
h1 = headings.get("h1", {})
|
||||
css_parts.append(".document-title {")
|
||||
css_parts.append(f" font-size: {h1.get('sizePt', 24)}pt;")
|
||||
css_parts.append(f" color: {h1.get('color', primaryColor)};")
|
||||
css_parts.append(f" font-weight: {h1.get('weight', 'bold')};")
|
||||
css_parts.append(" margin: 0 0 1em 0;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Headings h1-h4
|
||||
for level in range(1, 5):
|
||||
key = f"h{level}"
|
||||
h = headings.get(key, h1 if level == 1 else headings.get(f"h{level-1}", {}))
|
||||
css_parts.append(f"h{level} {{")
|
||||
css_parts.append(f" font-size: {h.get('sizePt', max(24 - (level-1)*4, 12))}pt;")
|
||||
css_parts.append(f" color: {h.get('color', primaryColor)};")
|
||||
css_parts.append(f" font-weight: {h.get('weight', 'bold')};")
|
||||
css_parts.append(f" margin: 1.2em 0 0.4em 0;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Paragraphs
|
||||
css_parts.append("p {")
|
||||
css_parts.append(f" font-size: {paraSizePt}pt;")
|
||||
css_parts.append(f" color: {paraColor};")
|
||||
css_parts.append(f" line-height: {lineSpacing};")
|
||||
css_parts.append(" margin: 0 0 1em 0;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Tables
|
||||
borderColor = tbl.get("borderColor", "#DEE2E6")
|
||||
css_parts.append("table {")
|
||||
css_parts.append(f" border-collapse: collapse;")
|
||||
css_parts.append(f" width: 100%;")
|
||||
css_parts.append(f" margin: 1em 0;")
|
||||
css_parts.append(f" border: 1px solid {borderColor};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Table headers
|
||||
css_parts.append("th {")
|
||||
css_parts.append(f" background: {tbl.get('headerBg', '#1F3864')};")
|
||||
css_parts.append(f" color: {tbl.get('headerFg', '#FFFFFF')};")
|
||||
css_parts.append(" font-weight: bold;")
|
||||
css_parts.append(" text-align: center;")
|
||||
css_parts.append(f" padding: 10px;")
|
||||
css_parts.append(f" border: 1px solid {borderColor};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Table cells
|
||||
css_parts.append("td {")
|
||||
css_parts.append(f" color: {paraColor};")
|
||||
css_parts.append(" padding: 8px;")
|
||||
css_parts.append(f" border: 1px solid {borderColor};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Lists
|
||||
css_parts.append("ul {")
|
||||
css_parts.append(f" font-size: {lst.get('sizePt', paraSizePt)}pt;")
|
||||
css_parts.append(f" color: {paraColor};")
|
||||
css_parts.append(f" padding-left: {lst.get('indentPt', 18)}pt;")
|
||||
css_parts.append(" margin: 0 0 1em 0;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Code blocks
|
||||
css_parts.append("pre {")
|
||||
css_parts.append(f" font-family: {monoFont};")
|
||||
css_parts.append(f" font-size: {cb.get('fontSizePt', 9)}pt;")
|
||||
css_parts.append(f" color: {paraColor};")
|
||||
css_parts.append(f" background: {cb.get('background', '#F8F9FA')};")
|
||||
css_parts.append(f" border: 1px solid {cb.get('borderColor', '#E2E8F0')};")
|
||||
css_parts.append(" border-radius: 4px;")
|
||||
css_parts.append(" padding: 1em;")
|
||||
css_parts.append(" margin: 1em 0;")
|
||||
css_parts.append(" overflow-x: auto;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Images
|
||||
css_parts.append("img {")
|
||||
css_parts.append(" max-width: 100%;")
|
||||
css_parts.append(" height: auto;")
|
||||
css_parts.append(" margin: 1em 0;")
|
||||
css_parts.append(" border-radius: 4px;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Generated info
|
||||
css_parts.append(".generated-info {")
|
||||
css_parts.append(" font-size: 0.9em;")
|
||||
css_parts.append(" color: #666;")
|
||||
css_parts.append(" text-align: center;")
|
||||
css_parts.append(" margin-top: 2em;")
|
||||
css_parts.append(" padding-top: 1em;")
|
||||
css_parts.append(" border-top: 1px solid #ddd;")
|
||||
css_parts.append("}")
|
||||
|
||||
return '\n'.join(css_parts)
|
||||
|
||||
def _renderInlineRuns(self, runs: list) -> str:
|
||||
"""Convert inline runs to HTML markup."""
|
||||
import html as htmlLib
|
||||
parts = []
|
||||
for run in runs:
|
||||
runType = run.get("type", "text")
|
||||
value = htmlLib.escape(run.get("value", ""))
|
||||
if runType == "text":
|
||||
parts.append(value)
|
||||
elif runType == "bold":
|
||||
parts.append(f"<strong>{value}</strong>")
|
||||
elif runType == "italic":
|
||||
parts.append(f"<em>{value}</em>")
|
||||
elif runType == "code":
|
||||
parts.append(f"<code>{value}</code>")
|
||||
elif runType == "link":
|
||||
href = htmlLib.escape(run.get("href", ""))
|
||||
parts.append(f'<a href="{href}">{value}</a>')
|
||||
elif runType == "image":
|
||||
b64 = run.get("base64Data", "")
|
||||
mime = run.get("mimeType", "image/png")
|
||||
alt = value
|
||||
if b64:
|
||||
parts.append(f'<img src="data:{mime};base64,{b64}" alt="{alt}" style="max-width:100%;height:auto;">')
|
||||
else:
|
||||
parts.append(value)
|
||||
return "".join(parts)
|
||||
|
||||
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a single JSON section to HTML using AI-generated styles.
|
||||
Supports three content formats: reference, object (base64), extracted_text.
|
||||
|
|
@ -419,6 +586,11 @@ class RendererHtml(BaseRenderer):
|
|||
# Regular paragraph element - extract from nested content structure (standard JSON format)
|
||||
content = element.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
# New format: inlineRuns
|
||||
inlineRuns = content.get("inlineRuns")
|
||||
if inlineRuns and isinstance(inlineRuns, list):
|
||||
htmlParts.append(f'<p>{self._renderInlineRuns(inlineRuns)}</p>')
|
||||
continue
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
|
|
@ -495,7 +667,8 @@ class RendererHtml(BaseRenderer):
|
|||
# Table header
|
||||
htmlParts.append('<thead><tr>')
|
||||
for header in headers:
|
||||
htmlParts.append(f'<th>{header}</th>')
|
||||
runs = self._inlineRunsForCell(header)
|
||||
htmlParts.append(f'<th>{self._renderInlineRuns(runs)}</th>')
|
||||
htmlParts.append('</tr></thead>')
|
||||
|
||||
# Table body
|
||||
|
|
@ -503,7 +676,8 @@ class RendererHtml(BaseRenderer):
|
|||
for row in rows:
|
||||
htmlParts.append('<tr>')
|
||||
for cellData in row:
|
||||
htmlParts.append(f'<td>{cellData}</td>')
|
||||
runs = self._inlineRunsForCell(cellData)
|
||||
htmlParts.append(f'<td>{self._renderInlineRuns(runs)}</td>')
|
||||
htmlParts.append('</tr>')
|
||||
htmlParts.append('</tbody>')
|
||||
|
||||
|
|
@ -528,10 +702,8 @@ class RendererHtml(BaseRenderer):
|
|||
|
||||
htmlParts = ['<ul>']
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
htmlParts.append(f'<li>{item}</li>')
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
htmlParts.append(f'<li>{item["text"]}</li>')
|
||||
runs = self._inlineRunsForListItem(item)
|
||||
htmlParts.append(f'<li>{self._renderInlineRuns(runs)}</li>')
|
||||
htmlParts.append('</ul>')
|
||||
|
||||
return '\n'.join(htmlParts)
|
||||
|
|
@ -571,6 +743,11 @@ class RendererHtml(BaseRenderer):
|
|||
if isinstance(el, dict):
|
||||
content = el.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
# New format: inlineRuns
|
||||
inlineRuns = content.get("inlineRuns")
|
||||
if inlineRuns and isinstance(inlineRuns, list):
|
||||
texts.append(self._renderInlineRuns(inlineRuns))
|
||||
continue
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
|
|
@ -581,16 +758,18 @@ class RendererHtml(BaseRenderer):
|
|||
elif isinstance(el, str):
|
||||
texts.append(el)
|
||||
if texts:
|
||||
# Join multiple paragraphs with <p> tags
|
||||
return '\n'.join(f'<p>{text}</p>' for text in texts)
|
||||
return ""
|
||||
elif isinstance(paragraphData, str):
|
||||
return f'<p>{paragraphData}</p>'
|
||||
elif isinstance(paragraphData, dict):
|
||||
# Handle nested content structure: element.content vs element.text
|
||||
# Extract from nested content structure
|
||||
content = paragraphData.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
# New format: inlineRuns
|
||||
inlineRuns = content.get("inlineRuns")
|
||||
if inlineRuns and isinstance(inlineRuns, list):
|
||||
return f'<p>{self._renderInlineRuns(inlineRuns)}</p>'
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
|
|
|
|||
|
|
@ -43,8 +43,17 @@ class RendererImage(BaseRenderer):
|
|||
"""
|
||||
return ["image"]
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(
|
||||
self,
|
||||
extractedContent: Dict[str, Any],
|
||||
title: str,
|
||||
userPrompt: str = None,
|
||||
aiService=None,
|
||||
*,
|
||||
style: Dict[str, Any] = None,
|
||||
) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to image format using AI image generation."""
|
||||
_ = style
|
||||
try:
|
||||
# Generate AI image from content
|
||||
imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
|
||||
|
|
|
|||
|
|
@ -42,8 +42,17 @@ class RendererJson(BaseRenderer):
|
|||
# Return all types except image
|
||||
return [st for st in supportedSectionTypes if st != "image"]
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(
|
||||
self,
|
||||
extractedContent: Dict[str, Any],
|
||||
title: str,
|
||||
userPrompt: str = None,
|
||||
aiService=None,
|
||||
*,
|
||||
style: Dict[str, Any] = None,
|
||||
) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to JSON format."""
|
||||
_ = style
|
||||
try:
|
||||
# The extracted content should already be JSON from the AI
|
||||
# Just validate and format it
|
||||
|
|
|
|||
|
|
@ -40,8 +40,17 @@ class RendererMarkdown(BaseRenderer):
|
|||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return [st for st in supportedSectionTypes if st != "image"]
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(
|
||||
self,
|
||||
extractedContent: Dict[str, Any],
|
||||
title: str,
|
||||
userPrompt: str = None,
|
||||
aiService=None,
|
||||
*,
|
||||
style: Dict[str, Any] = None,
|
||||
) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to Markdown format."""
|
||||
_ = style
|
||||
try:
|
||||
# Generate markdown from JSON structure
|
||||
markdownContent = self._generateMarkdownFromJson(extractedContent, title)
|
||||
|
|
|
|||
|
|
@ -106,17 +106,17 @@ class RendererPdf(BaseRenderer):
|
|||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
|
||||
try:
|
||||
if not REPORTLAB_AVAILABLE:
|
||||
# Fallback to HTML if reportlab not available
|
||||
from .rendererHtml import RendererHtml
|
||||
html_renderer = RendererHtml()
|
||||
return await html_renderer.render(extractedContent, title, userPrompt, aiService)
|
||||
return await html_renderer.render(extractedContent, title, userPrompt, aiService, style=style)
|
||||
|
||||
# Generate PDF using AI-analyzed styling
|
||||
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
|
||||
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style)
|
||||
|
||||
# Extract metadata for document type and other info
|
||||
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
||||
|
|
@ -163,11 +163,28 @@ class RendererPdf(BaseRenderer):
|
|||
)
|
||||
]
|
||||
|
||||
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, unifiedStyle: Dict[str, Any] = None) -> str:
|
||||
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||
styles = await self._getStyleSet(json_content, userPrompt, aiService)
|
||||
# Get style set from unified style or legacy approach
|
||||
if unifiedStyle:
|
||||
styles = self._convertUnifiedStyleToInternal(unifiedStyle)
|
||||
self._unifiedStyle = unifiedStyle
|
||||
for level in range(1, 7):
|
||||
hKey = f"heading{level}"
|
||||
if hKey not in styles:
|
||||
styles[hKey] = self._defaultHeadingStyleDef(level)
|
||||
else:
|
||||
styles[hKey].setdefault("space_after", 12)
|
||||
styles[hKey].setdefault("space_before", 12)
|
||||
styles["paragraph"].setdefault("space_after", 6)
|
||||
styles["paragraph"].setdefault("line_height", unifiedStyle["paragraph"].get("lineSpacing", 1.2))
|
||||
styles["bullet_list"].setdefault("space_after", 3)
|
||||
styles["code_block"].setdefault("space_after", 6)
|
||||
styles["code_block"].setdefault("align", "left")
|
||||
else:
|
||||
styles = await self._getStyleSet(json_content, userPrompt, aiService)
|
||||
self._unifiedStyle = None
|
||||
|
||||
# Validate JSON structure
|
||||
if not self._validateJsonStructure(json_content):
|
||||
|
|
@ -179,15 +196,13 @@ class RendererPdf(BaseRenderer):
|
|||
# Create a buffer to hold the PDF
|
||||
buffer = io.BytesIO()
|
||||
|
||||
# Create PDF document
|
||||
doc = SimpleDocTemplate(
|
||||
buffer,
|
||||
pagesize=A4,
|
||||
rightMargin=72,
|
||||
leftMargin=72,
|
||||
topMargin=72,
|
||||
bottomMargin=18
|
||||
)
|
||||
# Create PDF document with unified page margins or defaults
|
||||
pageCfg = unifiedStyle["page"] if unifiedStyle else None
|
||||
if pageCfg:
|
||||
m = pageCfg["marginsPt"]
|
||||
doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=m["right"], leftMargin=m["left"], topMargin=m["top"], bottomMargin=m["bottom"])
|
||||
else:
|
||||
doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
|
||||
|
||||
# Build PDF content (no cover page — body starts on page 1; filename still uses `title`)
|
||||
story = []
|
||||
|
|
@ -232,13 +247,28 @@ class RendererPdf(BaseRenderer):
|
|||
removed = False
|
||||
for idx, flowable in enumerate(story):
|
||||
fRepr = repr(flowable)
|
||||
if "Image" in fRepr and hasattr(flowable, 'drawWidth') and hasattr(flowable, 'drawHeight'):
|
||||
from reportlab.platypus import Image as ReportLabImage
|
||||
if isinstance(flowable, ReportLabImage):
|
||||
frameH = 650.0
|
||||
frameW = 450.0
|
||||
if flowable.drawHeight > frameH or flowable.drawWidth > frameW:
|
||||
scaleW = frameW / flowable.drawWidth if flowable.drawWidth > frameW else 1.0
|
||||
scaleH = frameH / flowable.drawHeight if flowable.drawHeight > frameH else 1.0
|
||||
s = min(scaleW, scaleH) * 0.9
|
||||
flowable.drawWidth = flowable.drawWidth * s
|
||||
flowable.drawHeight = flowable.drawHeight * s
|
||||
flowable._width = flowable.drawWidth
|
||||
flowable._height = flowable.drawHeight
|
||||
removed = True
|
||||
break
|
||||
if "Table" in fRepr and hasattr(flowable, '_cellvalues'):
|
||||
try:
|
||||
nRows = len(flowable._cellvalues)
|
||||
nCols = len(flowable._cellvalues[0]) if flowable._cellvalues else 0
|
||||
if nRows == 1 and nCols == 1:
|
||||
errPara = Paragraph(
|
||||
"[Code block omitted — content too large for PDF page]",
|
||||
"[Code block omitted - content too large for PDF page]",
|
||||
self._createNormalStyle({}),
|
||||
)
|
||||
story[idx] = errPara
|
||||
|
|
@ -609,6 +639,31 @@ class RendererPdf(BaseRenderer):
|
|||
.replace(">", ">")
|
||||
)
|
||||
|
||||
def _renderInlineRunsToPdfXml(self, runs: list) -> str:
|
||||
"""Convert inline runs to ReportLab Paragraph XML."""
|
||||
parts = []
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
monoFont = us["fonts"]["monospace"] if us else "Courier"
|
||||
for run in runs:
|
||||
runType = run.get("type", "text")
|
||||
value = self._escapeReportlabXml(run.get("value", ""))
|
||||
if runType == "text":
|
||||
parts.append(value)
|
||||
elif runType == "bold":
|
||||
parts.append(f"<b>{value}</b>")
|
||||
elif runType == "italic":
|
||||
parts.append(f"<i>{value}</i>")
|
||||
elif runType == "code":
|
||||
parts.append(f'<font name="{monoFont}">{value}</font>')
|
||||
elif runType == "link":
|
||||
href = self._escapeReportlabXml(run.get("href", ""))
|
||||
parts.append(f'<a href="{href}">{value}</a>')
|
||||
elif runType == "image":
|
||||
parts.append(f"[Image: {value}]")
|
||||
else:
|
||||
parts.append(value)
|
||||
return "".join(parts)
|
||||
|
||||
def _applyInlineMarkdownToEscapedPlain(self, text: str) -> str:
|
||||
"""Escape XML then apply bold/italic to a segment with no `code` spans (code is handled separately)."""
|
||||
if not text:
|
||||
|
|
@ -744,10 +799,10 @@ class RendererPdf(BaseRenderer):
|
|||
return []
|
||||
headers = content.get("headers", [])
|
||||
rows = content.get("rows", [])
|
||||
|
||||
|
||||
if not headers or not rows:
|
||||
return []
|
||||
|
||||
|
||||
numCols = len(headers)
|
||||
colWidth = _PDF_CONTENT_WIDTH_PT / max(numCols, 1)
|
||||
colWidths = [colWidth] * numCols
|
||||
|
|
@ -755,8 +810,12 @@ class RendererPdf(BaseRenderer):
|
|||
hdrPs = self._createTableCellParagraphStyle(styles, header=True, tableStyleKey="table_header")
|
||||
cellPs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell")
|
||||
|
||||
def _cellPara(val, ps):
|
||||
return self._paragraphFromInlineMarkdown(str(val) if val is not None else "", ps)
|
||||
def _cellPara(cell, ps):
|
||||
runs = self._inlineRunsForCell(cell)
|
||||
if isinstance(cell, list):
|
||||
xml = self._renderInlineRunsToPdfXml(runs)
|
||||
return Paragraph(_wrapEmojiSpansInXml(xml), ps)
|
||||
return self._paragraphFromInlineMarkdown(str(cell) if cell is not None else "", ps)
|
||||
|
||||
headerRow = [_cellPara(h, hdrPs) for h in headers]
|
||||
bodyRows = []
|
||||
|
|
@ -786,7 +845,7 @@ class RendererPdf(BaseRenderer):
|
|||
]
|
||||
table.setStyle(TableStyle(table_style))
|
||||
return [table, Spacer(1, 12)]
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
return []
|
||||
|
|
@ -794,32 +853,29 @@ class RendererPdf(BaseRenderer):
|
|||
def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = list_data.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return []
|
||||
items = content.get("items", [])
|
||||
bullet_style_def = styles.get("bullet_list", {})
|
||||
|
||||
bulletStyleDef = styles.get("bullet_list", {})
|
||||
normalStyle = self._createNormalStyle(styles)
|
||||
|
||||
elements = []
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
elements.append(
|
||||
Paragraph(f"• {self._markdownInlineToReportlabXml(item)}", self._createNormalStyle(styles))
|
||||
)
|
||||
runs = self._inlineRunsForListItem(item)
|
||||
if isinstance(item, list):
|
||||
xml = self._renderInlineRunsToPdfXml(runs)
|
||||
elements.append(Paragraph(f"\u2022 {_wrapEmojiSpansInXml(xml)}", normalStyle))
|
||||
elif isinstance(item, str):
|
||||
elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item)}", normalStyle))
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
elements.append(
|
||||
Paragraph(
|
||||
f"• {self._markdownInlineToReportlabXml(item['text'])}",
|
||||
self._createNormalStyle(styles),
|
||||
)
|
||||
)
|
||||
|
||||
elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item['text'])}", normalStyle))
|
||||
|
||||
if elements:
|
||||
elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
|
||||
|
||||
elements.append(Spacer(1, bulletStyleDef.get("space_after", 3)))
|
||||
|
||||
return elements
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
return []
|
||||
|
|
@ -848,20 +904,27 @@ class RendererPdf(BaseRenderer):
|
|||
def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = paragraph_data.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
else:
|
||||
text = ""
|
||||
|
||||
if isinstance(content, str):
|
||||
content = {"text": content}
|
||||
if not isinstance(content, dict):
|
||||
return []
|
||||
|
||||
normalStyle = self._createNormalStyle(styles)
|
||||
|
||||
if "inlineRuns" in content:
|
||||
runs = self._inlineRunsFromContent(content)
|
||||
xml = self._renderInlineRunsToPdfXml(runs)
|
||||
if xml:
|
||||
return [Paragraph(_wrapEmojiSpansInXml(xml), normalStyle)]
|
||||
return []
|
||||
|
||||
text = content.get("text", "")
|
||||
if text:
|
||||
return [self._paragraphFromInlineMarkdown(text, self._createNormalStyle(styles))]
|
||||
|
||||
return [self._paragraphFromInlineMarkdown(text, normalStyle)]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
return []
|
||||
|
|
@ -1030,20 +1093,18 @@ class RendererPdf(BaseRenderer):
|
|||
pilImage = PILImage.open(imageStream)
|
||||
originalWidth, originalHeight = pilImage.size
|
||||
|
||||
# Calculate available page dimensions (A4 with margins: 72pt left/right, 72pt top, 18pt bottom)
|
||||
pageWidth = A4[0] # 595.27 points
|
||||
pageHeight = A4[1] # 841.89 points
|
||||
leftMargin = 72
|
||||
rightMargin = 72
|
||||
topMargin = 72
|
||||
bottomMargin = 18
|
||||
|
||||
# Use actual frame dimensions from SimpleDocTemplate
|
||||
# Frame is smaller than page minus margins due to internal spacing
|
||||
# From error message: frame is 439.27559055118115 x 739.8897637795277
|
||||
# Use conservative values with safety margin
|
||||
availableWidth = 430.0 # Slightly smaller than frame width for safety
|
||||
availableHeight = 730.0 # Slightly smaller than frame height for safety
|
||||
# Use page dimensions minus margins with generous safety buffer
|
||||
# A4 = 595.27 x 841.89 pt; frame = page - margins - internal padding
|
||||
_us = getattr(self, '_unifiedStyle', None) or {}
|
||||
_pageMgn = (_us.get('page') or {}).get('marginsPt') or {}
|
||||
marginTop = _pageMgn.get('top', 60)
|
||||
marginBottom = _pageMgn.get('bottom', 60)
|
||||
marginLeft = _pageMgn.get('left', 60)
|
||||
marginRight = _pageMgn.get('right', 60)
|
||||
availableWidth = pageWidth - marginLeft - marginRight - 20 # 20pt safety
|
||||
availableHeight = pageHeight - marginTop - marginBottom - 80 # 80pt safety for header/footer
|
||||
|
||||
# Convert original image size from pixels to points
|
||||
# PIL provides size in pixels, need to convert to points
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ class RendererPptx(BaseRenderer):
|
|||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render content as PowerPoint presentation from JSON data.
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ class RendererPptx(BaseRenderer):
|
|||
title: Title for the presentation
|
||||
userPrompt: User prompt for AI styling
|
||||
aiService: AI service for styling
|
||||
**kwargs: Additional rendering options
|
||||
style: Unified style dict from pipeline (preferred over AI-generated styles)
|
||||
|
||||
Returns:
|
||||
Base64-encoded PowerPoint presentation as string
|
||||
|
|
@ -81,8 +81,19 @@ class RendererPptx(BaseRenderer):
|
|||
from pptx.dml.color import RGBColor
|
||||
import re
|
||||
|
||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||
styles = await self._getStyleSet(extractedContent, userPrompt, aiService)
|
||||
# Get style set: prefer unified style, then metadata, then AI-enhanced
|
||||
if style:
|
||||
internalStyle = self._convertUnifiedStyleToInternal(style)
|
||||
defaultPptx = self._getDefaultStyleSet()
|
||||
for key in ("slide_size", "content_per_slide", "design_theme", "color_scheme", "background_style", "accent_colors", "professional_grade", "executive_ready"):
|
||||
internalStyle[key] = defaultPptx.get(key)
|
||||
internalStyle["heading"] = internalStyle["heading1"]
|
||||
internalStyle["subheading"] = internalStyle["heading2"]
|
||||
styles = internalStyle
|
||||
self._unifiedStyle = style
|
||||
else:
|
||||
styles = await self._getStyleSet(extractedContent, userPrompt, aiService)
|
||||
self._unifiedStyle = None
|
||||
|
||||
# Create new presentation
|
||||
prs = Presentation()
|
||||
|
|
@ -910,15 +921,17 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
# Extract from nested content structure
|
||||
content = paragraph_data.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
text = content.get("text", "")
|
||||
if content.get("inlineRuns"):
|
||||
text = "".join(r.get("value", "") for r in content["inlineRuns"])
|
||||
else:
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
else:
|
||||
text = ""
|
||||
|
||||
if text:
|
||||
# Limit paragraph length based on content density
|
||||
max_length = 200 # Default limit
|
||||
max_length = 200
|
||||
if len(text) > max_length:
|
||||
text = text[:max_length] + "..."
|
||||
|
||||
|
|
@ -1303,6 +1316,32 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
r.text = text[pos:]
|
||||
_applyBase(r)
|
||||
|
||||
def _renderInlineRunsPptx(self, runs, paragraph, fontSize=None, fontColor=None):
|
||||
"""Process InlineRun dicts into pptx text runs."""
|
||||
from pptx.util import Pt
|
||||
paragraph.text = ""
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
monoFont = us["fonts"]["monospace"] if us else "Courier New"
|
||||
for run in runs:
|
||||
runType = run.get("type", "text")
|
||||
value = run.get("value", "")
|
||||
r = paragraph.add_run()
|
||||
r.text = value
|
||||
if fontSize:
|
||||
r.font.size = fontSize
|
||||
if fontColor:
|
||||
r.font.color.rgb = fontColor
|
||||
if runType == "bold":
|
||||
r.font.bold = True
|
||||
elif runType == "italic":
|
||||
r.font.italic = True
|
||||
elif runType == "code":
|
||||
r.font.name = monoFont
|
||||
if fontSize and hasattr(fontSize, 'pt'):
|
||||
r.font.size = Pt(max(8, int(fontSize.pt * 0.85)))
|
||||
elif runType == "link":
|
||||
r.font.underline = True
|
||||
|
||||
def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float = None, max_width: float = None) -> None:
|
||||
"""Add a PowerPoint table to slide."""
|
||||
try:
|
||||
|
|
@ -1374,7 +1413,8 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
cell = table.cell(0, col_idx)
|
||||
# Clear existing text and set new text
|
||||
cell.text_frame.clear()
|
||||
header_text = str(header) if header else ""
|
||||
cellRuns = self._inlineRunsForCell(header)
|
||||
header_text = "".join(r.get("value", "") for r in cellRuns)
|
||||
cell.text = header_text
|
||||
|
||||
# Ensure paragraph exists
|
||||
|
|
@ -1420,7 +1460,8 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
cell = table.cell(row_idx, col_idx)
|
||||
# Clear existing text and set new text
|
||||
cell.text_frame.clear()
|
||||
cell_text = str(cell_data) if cell_data is not None else ""
|
||||
cellRuns = self._inlineRunsForCell(cell_data)
|
||||
cell_text = "".join(r.get("value", "") for r in cellRuns)
|
||||
cell.text = cell_text
|
||||
|
||||
# Ensure paragraph exists
|
||||
|
|
@ -1462,9 +1503,8 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
fontColor = RGBColor(*self._getSafeColor(listStyle.get("color", (47, 47, 47))))
|
||||
|
||||
for item in items:
|
||||
itemText = item.get("text", "") if isinstance(item, dict) else str(item)
|
||||
if not itemText or not itemText.strip():
|
||||
continue
|
||||
runs = self._inlineRunsForListItem(item)
|
||||
isNewFormat = isinstance(item, list)
|
||||
|
||||
p = text_frame.add_paragraph()
|
||||
p.level = 0
|
||||
|
|
@ -1472,21 +1512,33 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
p.space_before = Pt(2)
|
||||
p.space_after = Pt(2)
|
||||
|
||||
# Consistent bullet prefix
|
||||
self._addMarkdownInlineRuns(p, f" • {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
|
||||
if isNewFormat:
|
||||
bulletRuns = [{"type": "text", "value": " \u2022 "}] + runs
|
||||
self._renderInlineRunsPptx(bulletRuns, p, fontSize=fontSize, fontColor=fontColor)
|
||||
else:
|
||||
itemText = item.get("text", "") if isinstance(item, dict) else str(item)
|
||||
if not itemText or not itemText.strip():
|
||||
continue
|
||||
self._addMarkdownInlineRuns(p, f" \u2022 {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
|
||||
|
||||
# Subitems
|
||||
# Subitems (only for dict-style items)
|
||||
if isinstance(item, dict):
|
||||
for sub in item.get("subitems", []):
|
||||
subText = sub.get("text", "") if isinstance(sub, dict) else str(sub)
|
||||
if not subText:
|
||||
continue
|
||||
subRuns = self._inlineRunsForListItem(sub)
|
||||
isSubNew = isinstance(sub, list)
|
||||
sp = text_frame.add_paragraph()
|
||||
sp.level = 0
|
||||
sp.alignment = PP_ALIGN.LEFT
|
||||
sp.space_before = Pt(1)
|
||||
sp.space_after = Pt(1)
|
||||
self._addMarkdownInlineRuns(sp, f" – {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
|
||||
if isSubNew:
|
||||
subBulletRuns = [{"type": "text", "value": " \u2013 "}] + subRuns
|
||||
self._renderInlineRunsPptx(subBulletRuns, sp, fontSize=fontSize, fontColor=fontColor)
|
||||
else:
|
||||
subText = sub.get("text", "") if isinstance(sub, dict) else str(sub)
|
||||
if not subText:
|
||||
continue
|
||||
self._addMarkdownInlineRuns(sp, f" \u2013 {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error adding bullet list to slide: {str(e)}")
|
||||
|
|
@ -1540,42 +1592,53 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
# Extract from nested content structure
|
||||
content = element.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
inlineRuns = self._inlineRunsFromContent(content)
|
||||
hasInlineRuns = content.get("inlineRuns") is not None
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
inlineRuns = [{"type": "text", "value": text}] if text else []
|
||||
hasInlineRuns = False
|
||||
else:
|
||||
text = ""
|
||||
inlineRuns = []
|
||||
hasInlineRuns = False
|
||||
|
||||
if text:
|
||||
p = text_frame.add_paragraph()
|
||||
p.level = 0
|
||||
|
||||
try:
|
||||
if hasattr(p, 'paragraph_format'):
|
||||
p.paragraph_format.bullet.type = None
|
||||
except (AttributeError, TypeError):
|
||||
pass
|
||||
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
base_font_size = paragraph_style.get("font_size", 14)
|
||||
calculated_size = max(10, int(base_font_size * font_size_multiplier))
|
||||
fSize = Pt(calculated_size)
|
||||
fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
|
||||
fBold = paragraph_style.get("bold", False)
|
||||
if not inlineRuns and not text:
|
||||
return
|
||||
|
||||
p = text_frame.add_paragraph()
|
||||
p.level = 0
|
||||
|
||||
try:
|
||||
if hasattr(p, 'paragraph_format'):
|
||||
p.paragraph_format.bullet.type = None
|
||||
except (AttributeError, TypeError):
|
||||
pass
|
||||
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
base_font_size = paragraph_style.get("font_size", 14)
|
||||
calculated_size = max(10, int(base_font_size * font_size_multiplier))
|
||||
fSize = Pt(calculated_size)
|
||||
fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
|
||||
fBold = paragraph_style.get("bold", False)
|
||||
|
||||
if hasInlineRuns:
|
||||
self._renderInlineRunsPptx(inlineRuns, p, fontSize=fSize, fontColor=fColor)
|
||||
else:
|
||||
self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=fBold)
|
||||
|
||||
# Add proper spacing
|
||||
p.space_before = Pt(6) # Space before paragraph
|
||||
p.space_after = Pt(6) # Space after paragraph
|
||||
p.line_spacing = 1.2 # Line spacing for readability
|
||||
|
||||
align = paragraph_style.get("align", "left")
|
||||
if align == "center":
|
||||
p.alignment = PP_ALIGN.CENTER
|
||||
elif align == "right":
|
||||
p.alignment = PP_ALIGN.RIGHT
|
||||
else:
|
||||
p.alignment = PP_ALIGN.LEFT
|
||||
|
||||
p.space_before = Pt(6)
|
||||
p.space_after = Pt(6)
|
||||
p.line_spacing = 1.2
|
||||
|
||||
align = paragraph_style.get("align", "left")
|
||||
if align == "center":
|
||||
p.alignment = PP_ALIGN.CENTER
|
||||
elif align == "right":
|
||||
p.alignment = PP_ALIGN.RIGHT
|
||||
else:
|
||||
p.alignment = PP_ALIGN.LEFT
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error adding paragraph to slide: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import re
|
|||
|
||||
from .documentRendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List, Optional
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
|
||||
class RendererText(BaseRenderer):
|
||||
"""Renders content to plain text format with format-specific extraction."""
|
||||
|
|
@ -76,8 +76,17 @@ class RendererText(BaseRenderer):
|
|||
# Text renderer accepts all types except images
|
||||
return [st for st in supportedSectionTypes if st != "image"]
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(
|
||||
self,
|
||||
extractedContent: Dict[str, Any],
|
||||
title: str,
|
||||
userPrompt: str = None,
|
||||
aiService=None,
|
||||
*,
|
||||
style: Dict[str, Any] = None,
|
||||
) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to plain text format."""
|
||||
_ = style # unified style from renderReport; plain text ignores formatting hints
|
||||
try:
|
||||
# Generate text from JSON structure
|
||||
textContent = self._generateTextFromJson(extractedContent, title)
|
||||
|
|
@ -263,16 +272,16 @@ class RendererText(BaseRenderer):
|
|||
textParts = []
|
||||
|
||||
# Create table header
|
||||
headerLine = " | ".join(str(header) for header in headers)
|
||||
headerLine = " | ".join(self._tableCellToPlainText(h) for h in headers)
|
||||
textParts.append(headerLine)
|
||||
|
||||
# Add separator line
|
||||
separatorLine = " | ".join("-" * len(str(header)) for header in headers)
|
||||
separatorLine = " | ".join("-" * len(self._tableCellToPlainText(h)) for h in headers)
|
||||
textParts.append(separatorLine)
|
||||
|
||||
# Add data rows
|
||||
for row in rows:
|
||||
rowLine = " | ".join(str(cellData) for cellData in row)
|
||||
rowLine = " | ".join(self._tableCellToPlainText(cellData) for cellData in row)
|
||||
textParts.append(rowLine)
|
||||
|
||||
return '\n'.join(textParts)
|
||||
|
|
@ -299,6 +308,9 @@ class RendererText(BaseRenderer):
|
|||
textParts.append(f"- {self._stripMarkdownForPlainText(item)}")
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
textParts.append(f"- {self._stripMarkdownForPlainText(item['text'])}")
|
||||
elif isinstance(item, list):
|
||||
# markdownToDocumentJson: each item is List[InlineRun]
|
||||
textParts.append(f"- {self._inlineRunsToPlainText(item)}")
|
||||
|
||||
return '\n'.join(textParts)
|
||||
|
||||
|
|
@ -345,12 +357,54 @@ class RendererText(BaseRenderer):
|
|||
text = re.sub(r'`([^`]+)`', r'\1', text)
|
||||
return text.strip()
|
||||
|
||||
def _inlineRunsToPlainText(self, runs: Union[List[Any], Any]) -> str:
|
||||
"""Flatten InlineRun dicts (from markdownToDocumentJson) to a single string."""
|
||||
if runs is None:
|
||||
return ""
|
||||
if isinstance(runs, dict):
|
||||
runs = [runs]
|
||||
if not isinstance(runs, list):
|
||||
return self._stripMarkdownForPlainText(str(runs))
|
||||
parts: List[str] = []
|
||||
for run in runs:
|
||||
if not isinstance(run, dict):
|
||||
parts.append(str(run))
|
||||
continue
|
||||
t = run.get("type") or "text"
|
||||
val = run.get("value", "")
|
||||
if t == "text":
|
||||
parts.append(str(val))
|
||||
elif t in ("bold", "italic", "code"):
|
||||
parts.append(str(val))
|
||||
elif t == "link":
|
||||
parts.append(str(val))
|
||||
elif t == "image":
|
||||
parts.append(f"[{val}]")
|
||||
else:
|
||||
parts.append(str(val))
|
||||
return "".join(parts)
|
||||
|
||||
def _tableCellToPlainText(self, cell: Any) -> str:
|
||||
"""Table header/cell: plain str, legacy dict, or List[InlineRun]."""
|
||||
if cell is None:
|
||||
return ""
|
||||
if isinstance(cell, str):
|
||||
return self._stripMarkdownForPlainText(cell)
|
||||
if isinstance(cell, list):
|
||||
return self._inlineRunsToPlainText(cell)
|
||||
if isinstance(cell, dict) and "text" in cell:
|
||||
return self._stripMarkdownForPlainText(str(cell["text"]))
|
||||
return self._stripMarkdownForPlainText(str(cell))
|
||||
|
||||
def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON paragraph to text. Strips markdown for plain text output."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = paragraphData.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
runs = self._inlineRunsFromContent(content)
|
||||
if runs:
|
||||
return self._stripMarkdownForPlainText(self._inlineRunsToPlainText(runs))
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
|
|
|
|||
|
|
@ -68,17 +68,17 @@ class RendererXlsx(BaseRenderer):
|
|||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
|
||||
try:
|
||||
if not OPENPYXL_AVAILABLE:
|
||||
# Fallback to CSV if openpyxl not available
|
||||
from .rendererCsv import RendererCsv
|
||||
csvRenderer = RendererCsv()
|
||||
return await csvRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||
return await csvRenderer.render(extractedContent, title, userPrompt, aiService, style=style)
|
||||
|
||||
# Generate Excel using AI-analyzed styling
|
||||
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
|
||||
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService, style=style)
|
||||
|
||||
# Extract metadata for document type and other info
|
||||
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
||||
|
|
@ -298,15 +298,22 @@ class RendererXlsx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
|
||||
|
||||
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str:
|
||||
"""Generate Excel content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Debug output
|
||||
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER")
|
||||
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER")
|
||||
|
||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
|
||||
# Store unified style for use by inline-run helpers
|
||||
self._unifiedStyle = style
|
||||
|
||||
# Get style set: prefer unified style, fall back to legacy approach
|
||||
if style:
|
||||
styles = self._convertUnifiedStyleToInternal(style)
|
||||
styles = self._convertColorsFormat(styles)
|
||||
else:
|
||||
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
|
||||
|
||||
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
|
||||
if not self._validateJsonStructure(jsonContent):
|
||||
|
|
@ -511,6 +518,10 @@ class RendererXlsx(BaseRenderer):
|
|||
"code_block": {"font": "Courier New", "font_size": 10, "color": "FF2F2F2F", "background": "FFF5F5F5"}
|
||||
}
|
||||
|
||||
def _renderInlineRuns(self, runs: list) -> str:
|
||||
"""Flatten inline runs to plain text for Excel cells."""
|
||||
return "".join(r.get("value", "") for r in runs)
|
||||
|
||||
async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Get AI styles with proper Excel color conversion."""
|
||||
if not aiService:
|
||||
|
|
@ -1206,7 +1217,9 @@ class RendererXlsx(BaseRenderer):
|
|||
|
||||
# Add headers with formatting - OPTIMIZED: use cached style objects
|
||||
for col, header in enumerate(headers, 1):
|
||||
sanitized_header = self._sanitizeCellValue(header)
|
||||
runs = self._inlineRunsForCell(header)
|
||||
headerText = self._renderInlineRuns(runs)
|
||||
sanitized_header = self._sanitizeCellValue(headerText)
|
||||
cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
|
||||
|
||||
# Apply styling with fallbacks - use pre-calculated objects
|
||||
|
|
@ -1272,7 +1285,9 @@ class RendererXlsx(BaseRenderer):
|
|||
cell_values = cell_values[:header_count]
|
||||
|
||||
for col, cell_value in enumerate(cell_values, 1):
|
||||
sanitized_value = self._sanitizeCellValue(cell_value)
|
||||
runs = self._inlineRunsForCell(cell_value)
|
||||
cellText = self._renderInlineRuns(runs)
|
||||
sanitized_value = self._sanitizeCellValue(cellText)
|
||||
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
|
||||
|
||||
# Apply styling with fallbacks - use pre-calculated objects
|
||||
|
|
@ -1311,20 +1326,20 @@ class RendererXlsx(BaseRenderer):
|
|||
def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||
"""Add a list element to Excel sheet. Expects nested content structure."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = element.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return startRow
|
||||
list_items = content.get("items") or []
|
||||
# Ensure list_items is a list
|
||||
if not isinstance(list_items, list):
|
||||
list_items = []
|
||||
listItems = content.get("items") or []
|
||||
if not isinstance(listItems, list):
|
||||
listItems = []
|
||||
|
||||
list_style = styles.get("bullet_list", {})
|
||||
for item in list_items:
|
||||
sheet.cell(row=startRow, column=1, value=f"• {item}")
|
||||
if list_style.get("color"):
|
||||
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"]))
|
||||
listStyle = styles.get("bullet_list", {})
|
||||
for item in listItems:
|
||||
runs = self._inlineRunsForListItem(item)
|
||||
text = self._renderInlineRuns(runs)
|
||||
sheet.cell(row=startRow, column=1, value=f"\u2022 {text}")
|
||||
if listStyle.get("color"):
|
||||
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(listStyle["color"]))
|
||||
startRow += 1
|
||||
|
||||
return startRow
|
||||
|
|
@ -1336,10 +1351,10 @@ class RendererXlsx(BaseRenderer):
|
|||
def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||
"""Add a paragraph element to Excel sheet. Expects nested content structure."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = element.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
text = content.get("text", "")
|
||||
runs = self._inlineRunsFromContent(content)
|
||||
text = self._renderInlineRuns(runs)
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,75 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Default style definitions and style resolution for document rendering."""
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
DEFAULT_STYLE: Dict[str, Any] = {
|
||||
"fonts": {
|
||||
"primary": "Calibri",
|
||||
"monospace": "Consolas",
|
||||
},
|
||||
"colors": {
|
||||
"primary": "#1F3864",
|
||||
"secondary": "#2C3E50",
|
||||
"accent": "#2980B9",
|
||||
"background": "#FFFFFF",
|
||||
},
|
||||
"headings": {
|
||||
"h1": {"sizePt": 24, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 12, "spaceAfterPt": 6},
|
||||
"h2": {"sizePt": 18, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 10, "spaceAfterPt": 4},
|
||||
"h3": {"sizePt": 14, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 8, "spaceAfterPt": 3},
|
||||
"h4": {"sizePt": 12, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 6, "spaceAfterPt": 2},
|
||||
},
|
||||
"paragraph": {"sizePt": 11, "lineSpacing": 1.15, "color": "#333333"},
|
||||
"table": {
|
||||
"headerBg": "#1F3864",
|
||||
"headerFg": "#FFFFFF",
|
||||
"headerSizePt": 10,
|
||||
"bodySizePt": 10,
|
||||
"rowBandingEven": "#F2F6FC",
|
||||
"rowBandingOdd": "#FFFFFF",
|
||||
"borderColor": "#CBD5E1",
|
||||
"borderWidthPt": 0.5,
|
||||
},
|
||||
"list": {"bulletChar": "\u2022", "indentPt": 18, "sizePt": 11},
|
||||
"image": {"defaultWidthPt": 480, "maxWidthPt": 800, "alignment": "center"},
|
||||
"codeBlock": {"fontSizePt": 9, "background": "#F8F9FA", "borderColor": "#E2E8F0"},
|
||||
"page": {
|
||||
"format": "A4",
|
||||
"marginsPt": {"top": 60, "bottom": 60, "left": 60, "right": 60},
|
||||
"showPageNumbers": True,
|
||||
"headerHeight": 30,
|
||||
"footerHeight": 30,
|
||||
"headerLogo": None,
|
||||
"headerText": "",
|
||||
"footerText": "",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Recursively merge override into base. Both dicts left unchanged; returns new dict."""
|
||||
result = {}
|
||||
for key in base:
|
||||
if key in override:
|
||||
baseVal = base[key]
|
||||
overVal = override[key]
|
||||
if isinstance(baseVal, dict) and isinstance(overVal, dict):
|
||||
result[key] = _deepMerge(baseVal, overVal)
|
||||
else:
|
||||
result[key] = overVal
|
||||
else:
|
||||
result[key] = base[key]
|
||||
for key in override:
|
||||
if key not in base:
|
||||
result[key] = override[key]
|
||||
return result
|
||||
|
||||
|
||||
def resolveStyle(agentStyle: dict | None) -> Dict[str, Any]:
|
||||
"""Deep-merge DEFAULT_STYLE <- agentStyle. Returns fully resolved style dict."""
|
||||
if not agentStyle:
|
||||
return dict(DEFAULT_STYLE)
|
||||
return _deepMerge(DEFAULT_STYLE, agentStyle)
|
||||
|
|
@ -9,11 +9,70 @@ from typing import Any, Dict
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _parseInlineRuns(text: str) -> list:
|
||||
"""
|
||||
Parse inline markdown formatting into a list of InlineRun dicts.
|
||||
Handles: images, links, bold, italic, inline code, plain text.
|
||||
Uses a regex-based tokenizer that processes tokens left-to-right.
|
||||
"""
|
||||
if not text:
|
||||
return [{"type": "text", "value": ""}]
|
||||
|
||||
# Pattern order matters: images before links, bold before italic
|
||||
_TOKEN_RE = re.compile(
|
||||
r'!\[(?P<imgAlt>[^\]]*)\]\((?P<imgSrc>[^)"]+)(?:\s+"(?P<imgWidth>\d+)pt")?\)' # image
|
||||
r'|\[(?P<linkText>[^\]]+)\]\((?P<linkHref>[^)]+)\)' # link
|
||||
r'|`(?P<code>[^`]+)`' # inline code
|
||||
r'|\*\*(?P<bold>.+?)\*\*' # bold
|
||||
r'|(?<!\w)\*(?P<italic1>.+?)\*(?!\w)' # italic *x*
|
||||
r'|(?<!\w)_(?P<italic2>.+?)_(?!\w)' # italic _x_
|
||||
)
|
||||
|
||||
runs = []
|
||||
lastEnd = 0
|
||||
|
||||
for m in _TOKEN_RE.finditer(text):
|
||||
# Plain text before this match
|
||||
if m.start() > lastEnd:
|
||||
runs.append({"type": "text", "value": text[lastEnd:m.start()]})
|
||||
|
||||
if m.group("imgAlt") is not None or m.group("imgSrc") is not None:
|
||||
alt = (m.group("imgAlt") or "").strip() or "Image"
|
||||
src = (m.group("imgSrc") or "").strip()
|
||||
widthStr = m.group("imgWidth")
|
||||
run = {"type": "image", "value": alt}
|
||||
if src.startswith("file:"):
|
||||
run["fileId"] = src[5:]
|
||||
else:
|
||||
run["href"] = src
|
||||
if widthStr:
|
||||
run["widthPt"] = int(widthStr)
|
||||
runs.append(run)
|
||||
elif m.group("linkText") is not None:
|
||||
runs.append({"type": "link", "value": m.group("linkText"), "href": m.group("linkHref")})
|
||||
elif m.group("code") is not None:
|
||||
runs.append({"type": "code", "value": m.group("code")})
|
||||
elif m.group("bold") is not None:
|
||||
runs.append({"type": "bold", "value": m.group("bold")})
|
||||
elif m.group("italic1") is not None:
|
||||
runs.append({"type": "italic", "value": m.group("italic1")})
|
||||
elif m.group("italic2") is not None:
|
||||
runs.append({"type": "italic", "value": m.group("italic2")})
|
||||
|
||||
lastEnd = m.end()
|
||||
|
||||
# Trailing plain text
|
||||
if lastEnd < len(text):
|
||||
runs.append({"type": "text", "value": text[lastEnd:]})
|
||||
|
||||
return runs if runs else [{"type": "text", "value": text}]
|
||||
|
||||
|
||||
def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
|
||||
"""
|
||||
Convert markdown content to the standard document JSON format expected by renderReport.
|
||||
Supports headings, code blocks, tables, lists, images (file: refs), paragraphs.
|
||||
For plain text: wraps entire content in a single paragraph section.
|
||||
Convert markdown content to the standard document JSON format with Inline-Run model.
|
||||
Sections use inlineRuns (list of run dicts) instead of plain text strings.
|
||||
Supports headings, code blocks, tables, lists, images, paragraphs.
|
||||
"""
|
||||
if not isinstance(markdown, str):
|
||||
markdown = str(markdown) if markdown else ""
|
||||
|
|
@ -31,7 +90,7 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
|
|||
while i < len(lines):
|
||||
line = lines[i]
|
||||
|
||||
# Headings
|
||||
# Headings (plain text, no inline formatting)
|
||||
headingMatch = re.match(r"^(#{1,6})\s+(.+)", line)
|
||||
if headingMatch:
|
||||
level = len(headingMatch.group(1))
|
||||
|
|
@ -43,7 +102,7 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
|
|||
i += 1
|
||||
continue
|
||||
|
||||
# Fenced code blocks
|
||||
# Fenced code blocks (no inline formatting)
|
||||
codeMatch = re.match(r"^```(\w*)", line)
|
||||
if codeMatch:
|
||||
lang = codeMatch.group(1) or "text"
|
||||
|
|
@ -59,14 +118,14 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
|
|||
})
|
||||
continue
|
||||
|
||||
# Tables
|
||||
# Tables - cells are List[InlineRun]
|
||||
tableMatch = re.match(r"^\|(.+)\|$", line)
|
||||
if tableMatch and (i + 1) < len(lines) and re.match(r"^\|[\s\-:|]+\|$", lines[i + 1]):
|
||||
headerCells = [c.strip() for c in tableMatch.group(1).split("|")]
|
||||
headerCells = [_parseInlineRuns(c.strip()) for c in tableMatch.group(1).split("|")]
|
||||
i += 2
|
||||
rows = []
|
||||
while i < len(lines) and re.match(r"^\|(.+)\|$", lines[i]):
|
||||
rowCells = [c.strip() for c in lines[i][1:-1].split("|")]
|
||||
rowCells = [_parseInlineRuns(c.strip()) for c in lines[i][1:-1].split("|")]
|
||||
rows.append(rowCells)
|
||||
i += 1
|
||||
sections.append({
|
||||
|
|
@ -75,14 +134,14 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
|
|||
})
|
||||
continue
|
||||
|
||||
# Bullet / numbered lists
|
||||
# Bullet / numbered lists - items are List[List[InlineRun]]
|
||||
listMatch = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", line)
|
||||
if listMatch:
|
||||
isNumbered = bool(re.match(r"\d+[.)]", listMatch.group(2)))
|
||||
items = []
|
||||
while i < len(lines) and re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i]):
|
||||
m = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i])
|
||||
items.append({"text": m.group(3).strip()})
|
||||
items.append(_parseInlineRuns(m.group(3).strip()))
|
||||
i += 1
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "bullet_list", "order": order,
|
||||
|
|
@ -95,46 +154,50 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
|
|||
i += 1
|
||||
continue
|
||||
|
||||
# Images (simplified: store as paragraph with ref for now - full resolution needs Knowledge Store)
|
||||
imgMatch = re.match(r"^!\[([^\]]*)\]\(([^)]+)\)", line)
|
||||
# Standalone image on its own line -> block-level image section
|
||||
imgMatch = re.match(r"^!\[([^\]]*)\]\(([^)\"]+)(?:\s+\"(\d+)pt\")?\)\s*$", line)
|
||||
if imgMatch:
|
||||
altText = imgMatch.group(1).strip() or "Image"
|
||||
src = imgMatch.group(2).strip()
|
||||
widthStr = imgMatch.group(3)
|
||||
fileId = src[5:] if src.startswith("file:") else ""
|
||||
content = {
|
||||
"altText": altText,
|
||||
"base64Data": "",
|
||||
"_fileRef": fileId,
|
||||
"_srcUrl": src if not fileId else "",
|
||||
}
|
||||
if widthStr:
|
||||
content["widthPt"] = int(widthStr)
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "image", "order": order,
|
||||
"elements": [{
|
||||
"content": {
|
||||
"altText": altText,
|
||||
"base64Data": "",
|
||||
"_fileRef": fileId,
|
||||
"_srcUrl": src if not fileId else "",
|
||||
}
|
||||
}],
|
||||
"elements": [{"content": content}],
|
||||
})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Paragraph
|
||||
# Paragraph - produces inlineRuns
|
||||
paraLines = []
|
||||
while i < len(lines) and lines[i].strip() and not re.match(
|
||||
r"^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )", lines[i]
|
||||
r"^(#{1,6}\s|```|\|.+\||!\[[^\]]*\]\([^)]+\)\s*$|(\s*)([-*+]|\d+[.)]) )", lines[i]
|
||||
):
|
||||
paraLines.append(lines[i])
|
||||
i += 1
|
||||
if paraLines:
|
||||
combinedText = " ".join(paraLines)
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "paragraph", "order": order,
|
||||
"elements": [{"content": {"text": " ".join(paraLines)}}],
|
||||
"elements": [{"content": {"inlineRuns": _parseInlineRuns(combinedText)}}],
|
||||
})
|
||||
continue
|
||||
|
||||
i += 1
|
||||
|
||||
if not sections:
|
||||
fallbackText = markdown.strip() or "(empty)"
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "paragraph", "order": order,
|
||||
"elements": [{"content": {"text": markdown.strip() or "(empty)"}}],
|
||||
"elements": [{"content": {"inlineRuns": _parseInlineRuns(fallbackText)}}],
|
||||
})
|
||||
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -2,9 +2,13 @@
|
|||
# All rights reserved.
|
||||
"""Knowledge service: 3-tier RAG with indexing, semantic search, and context building."""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
from modules.datamodels.datamodelKnowledge import (
|
||||
FileContentIndex, ContentChunk, WorkflowMemory,
|
||||
|
|
@ -20,6 +24,68 @@ DEFAULT_CHUNK_TOKENS = 400
|
|||
DEFAULT_CONTEXT_BUDGET = 12000
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Ingestion façade (P0 of unified-knowledge-indexing concept)
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class IngestionJob:
|
||||
"""One request to add or refresh content in the unified knowledge store.
|
||||
|
||||
Callers from any lane (routes, feature hooks, agent tools, connector sync)
|
||||
describe the work they want done via this object; idempotency, scope
|
||||
resolution, and embedding are handled by KnowledgeService.requestIngestion.
|
||||
"""
|
||||
sourceKind: str
|
||||
sourceId: str
|
||||
fileName: str
|
||||
mimeType: str
|
||||
userId: str
|
||||
contentObjects: List[Dict[str, Any]] = field(default_factory=list)
|
||||
featureInstanceId: str = ""
|
||||
mandateId: str = ""
|
||||
structure: Optional[Dict[str, Any]] = None
|
||||
containerPath: Optional[str] = None
|
||||
contentVersion: Optional[str] = None
|
||||
provenance: Optional[Dict[str, Any]] = None
|
||||
# Connector-driven neutralization: True when the user opted in via §2.6 preferences.
|
||||
# For sourceKind == "file", _indexFileInternal resolves this from FileItem.neutralize instead.
|
||||
neutralize: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class IngestionHandle:
|
||||
"""Result of requestIngestion. Stable across in-process and future queue impls."""
|
||||
jobId: str
|
||||
status: str
|
||||
contentHash: str
|
||||
fileId: str
|
||||
index: Optional[FileContentIndex] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
def _computeIngestionHash(contentObjects: List[Dict[str, Any]]) -> str:
|
||||
"""Deterministic SHA256 over (contentType, data) tuples in extractor order.
|
||||
|
||||
`contentObjectId` is intentionally excluded because extractors generate
|
||||
fresh UUIDs per run (`uuid.uuid4()`), which would make the hash unstable
|
||||
across re-extractions of the same source — defeating idempotency.
|
||||
Order is preserved (no sort) because two different documents can share the
|
||||
same multiset of parts but differ in arrangement (e.g. swapped pages).
|
||||
Text whitespace is preserved intentionally because chunk boundaries
|
||||
depend on it.
|
||||
"""
|
||||
normalized = [
|
||||
(
|
||||
str(o.get("contentType", "text") or "text"),
|
||||
o.get("data", "") or "",
|
||||
)
|
||||
for o in (contentObjects or [])
|
||||
]
|
||||
payload = json.dumps(normalized, ensure_ascii=False, separators=(",", ":"))
|
||||
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
class KnowledgeService:
|
||||
"""Service for Knowledge Store operations: indexing, retrieval, and context building."""
|
||||
|
||||
|
|
@ -46,6 +112,224 @@ class KnowledgeService:
|
|||
results = await self._embed([text])
|
||||
return results[0] if results else []
|
||||
|
||||
# =========================================================================
|
||||
# Ingestion façade (single entry point for all lanes)
|
||||
# =========================================================================
|
||||
|
||||
async def requestIngestion(self, job: IngestionJob) -> IngestionHandle:
|
||||
"""Unified entry point for filling the knowledge corpus.
|
||||
|
||||
Applies idempotency based on a content hash (or caller-supplied
|
||||
`contentVersion`) persisted in `FileContentIndex.structure._ingestion`.
|
||||
Re-runs indexing only when the hash differs or the previous run did
|
||||
not reach `indexed` state. Runs embedding synchronously for now
|
||||
(callers already schedule background tasks where needed).
|
||||
"""
|
||||
jobId = f"{job.sourceKind}:{job.sourceId}"
|
||||
startMs = time.time()
|
||||
contentHash = job.contentVersion or _computeIngestionHash(job.contentObjects)
|
||||
|
||||
# 1. Check for duplicate via existing FileContentIndex row.
|
||||
existing = None
|
||||
try:
|
||||
existing = self._knowledgeDb.getFileContentIndex(job.sourceId)
|
||||
except Exception:
|
||||
existing = None
|
||||
|
||||
if existing:
|
||||
existingStructure = (
|
||||
existing.get("structure") if isinstance(existing, dict)
|
||||
else getattr(existing, "structure", {})
|
||||
) or {}
|
||||
existingMeta = existingStructure.get("_ingestion", {}) or {}
|
||||
existingStatus = (
|
||||
existing.get("status") if isinstance(existing, dict)
|
||||
else getattr(existing, "status", "")
|
||||
) or ""
|
||||
if existingMeta.get("hash") == contentHash and existingStatus == "indexed":
|
||||
logger.info(
|
||||
"ingestion.skipped.duplicate sourceKind=%s sourceId=%s hash=%s",
|
||||
job.sourceKind, job.sourceId, contentHash[:12],
|
||||
extra={
|
||||
"event": "ingestion.skipped.duplicate",
|
||||
"jobId": jobId,
|
||||
"sourceKind": job.sourceKind,
|
||||
"sourceId": job.sourceId,
|
||||
"hash": contentHash,
|
||||
"durationMs": int((time.time() - startMs) * 1000),
|
||||
},
|
||||
)
|
||||
return IngestionHandle(
|
||||
jobId=jobId,
|
||||
status="duplicate",
|
||||
contentHash=contentHash,
|
||||
fileId=job.sourceId,
|
||||
index=None,
|
||||
)
|
||||
|
||||
# 2. Prepare ingestion metadata; stays in structure._ingestion so
|
||||
# later connector revoke/purge can filter chunks by sourceKind /
|
||||
# provenance.connectionId without a schema migration.
|
||||
ingestionMeta = {
|
||||
"hash": contentHash,
|
||||
"sourceKind": job.sourceKind,
|
||||
"sourceId": job.sourceId,
|
||||
"contentVersion": job.contentVersion,
|
||||
"indexedAt": getUtcTimestamp(),
|
||||
"provenance": dict(job.provenance or {}),
|
||||
}
|
||||
structure = dict(job.structure or {})
|
||||
structure["_ingestion"] = ingestionMeta
|
||||
|
||||
logger.info(
|
||||
"ingestion.queued sourceKind=%s sourceId=%s objects=%d hash=%s",
|
||||
job.sourceKind, job.sourceId, len(job.contentObjects or []), contentHash[:12],
|
||||
extra={
|
||||
"event": "ingestion.queued",
|
||||
"jobId": jobId,
|
||||
"sourceKind": job.sourceKind,
|
||||
"sourceId": job.sourceId,
|
||||
"hash": contentHash,
|
||||
"objectCount": len(job.contentObjects or []),
|
||||
},
|
||||
)
|
||||
|
||||
# 3. Run real indexing.
|
||||
try:
|
||||
index = await self._indexFileInternal(
|
||||
fileId=job.sourceId,
|
||||
fileName=job.fileName,
|
||||
mimeType=job.mimeType,
|
||||
userId=job.userId,
|
||||
featureInstanceId=job.featureInstanceId,
|
||||
mandateId=job.mandateId,
|
||||
contentObjects=job.contentObjects or [],
|
||||
structure=structure,
|
||||
containerPath=job.containerPath,
|
||||
sourceKind=job.sourceKind,
|
||||
connectionId=(job.provenance or {}).get("connectionId"),
|
||||
neutralize=job.neutralize,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"ingestion.failed sourceKind=%s sourceId=%s error=%s",
|
||||
job.sourceKind, job.sourceId, exc,
|
||||
exc_info=True,
|
||||
extra={
|
||||
"event": "ingestion.failed",
|
||||
"jobId": jobId,
|
||||
"sourceKind": job.sourceKind,
|
||||
"sourceId": job.sourceId,
|
||||
"hash": contentHash,
|
||||
"error": str(exc),
|
||||
"durationMs": int((time.time() - startMs) * 1000),
|
||||
},
|
||||
)
|
||||
try:
|
||||
self._knowledgeDb.updateFileStatus(job.sourceId, "failed")
|
||||
except Exception:
|
||||
pass
|
||||
return IngestionHandle(
|
||||
jobId=jobId,
|
||||
status="failed",
|
||||
contentHash=contentHash,
|
||||
fileId=job.sourceId,
|
||||
index=None,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"ingestion.indexed sourceKind=%s sourceId=%s objects=%d durationMs=%d",
|
||||
job.sourceKind, job.sourceId, len(job.contentObjects or []),
|
||||
int((time.time() - startMs) * 1000),
|
||||
extra={
|
||||
"event": "ingestion.indexed",
|
||||
"jobId": jobId,
|
||||
"sourceKind": job.sourceKind,
|
||||
"sourceId": job.sourceId,
|
||||
"hash": contentHash,
|
||||
"objectCount": len(job.contentObjects or []),
|
||||
"durationMs": int((time.time() - startMs) * 1000),
|
||||
},
|
||||
)
|
||||
return IngestionHandle(
|
||||
jobId=jobId,
|
||||
status="indexed",
|
||||
contentHash=contentHash,
|
||||
fileId=job.sourceId,
|
||||
index=index,
|
||||
)
|
||||
|
||||
def purgeConnection(self, connectionId: str) -> Dict[str, int]:
|
||||
"""Delete every FileContentIndex + ContentChunk linked to a UserConnection.
|
||||
|
||||
Called on `connection.revoked` events so the knowledge corpus never
|
||||
holds chunks the user has withdrawn access to. Returns deletion counts
|
||||
for observability.
|
||||
"""
|
||||
if not connectionId:
|
||||
return {"indexRows": 0, "chunks": 0}
|
||||
startMs = time.time()
|
||||
result = self._knowledgeDb.deleteFileContentIndexByConnectionId(connectionId)
|
||||
logger.info(
|
||||
"ingestion.connection.purged connectionId=%s rows=%d chunks=%d durationMs=%d",
|
||||
connectionId, result["indexRows"], result["chunks"],
|
||||
int((time.time() - startMs) * 1000),
|
||||
extra={
|
||||
"event": "ingestion.connection.purged",
|
||||
"connectionId": connectionId,
|
||||
"indexRows": result["indexRows"],
|
||||
"chunks": result["chunks"],
|
||||
"durationMs": int((time.time() - startMs) * 1000),
|
||||
},
|
||||
)
|
||||
return result
|
||||
|
||||
def getIngestionStatus(
|
||||
self, handleOrJobId: Union[IngestionHandle, str]
|
||||
) -> Dict[str, Any]:
|
||||
"""Map a handle or `sourceKind:sourceId` jobId to a status snapshot."""
|
||||
if isinstance(handleOrJobId, IngestionHandle):
|
||||
sourceId = handleOrJobId.fileId
|
||||
jobId = handleOrJobId.jobId
|
||||
elif isinstance(handleOrJobId, str) and ":" in handleOrJobId:
|
||||
jobId = handleOrJobId
|
||||
sourceId = handleOrJobId.split(":", 1)[1]
|
||||
else:
|
||||
jobId = str(handleOrJobId)
|
||||
sourceId = str(handleOrJobId)
|
||||
|
||||
row = None
|
||||
try:
|
||||
row = self._knowledgeDb.getFileContentIndex(sourceId)
|
||||
except Exception:
|
||||
row = None
|
||||
if not row:
|
||||
return {
|
||||
"jobId": jobId,
|
||||
"sourceId": sourceId,
|
||||
"status": "unknown",
|
||||
"contentHash": None,
|
||||
}
|
||||
|
||||
structure = (
|
||||
row.get("structure") if isinstance(row, dict)
|
||||
else getattr(row, "structure", {})
|
||||
) or {}
|
||||
meta = structure.get("_ingestion", {}) or {}
|
||||
status = (
|
||||
row.get("status") if isinstance(row, dict)
|
||||
else getattr(row, "status", "")
|
||||
) or "unknown"
|
||||
return {
|
||||
"jobId": jobId,
|
||||
"sourceId": sourceId,
|
||||
"status": status,
|
||||
"contentHash": meta.get("hash"),
|
||||
"sourceKind": meta.get("sourceKind"),
|
||||
"indexedAt": meta.get("indexedAt"),
|
||||
}
|
||||
|
||||
# =========================================================================
|
||||
# File Indexing (called after extraction, before embedding)
|
||||
# =========================================================================
|
||||
|
|
@ -61,6 +345,57 @@ class KnowledgeService:
|
|||
contentObjects: List[Dict[str, Any]] = None,
|
||||
structure: Dict[str, Any] = None,
|
||||
containerPath: str = None,
|
||||
) -> Optional[FileContentIndex]:
|
||||
"""Backward-compatible wrapper delegating to requestIngestion.
|
||||
|
||||
Existing callers that still invoke `indexFile` directly automatically
|
||||
participate in the idempotency/metrics layer. New callers should
|
||||
prefer `requestIngestion` so they can pass `sourceKind` and
|
||||
`provenance` for connector revoke/purge later.
|
||||
"""
|
||||
job = IngestionJob(
|
||||
sourceKind="file",
|
||||
sourceId=fileId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
featureInstanceId=featureInstanceId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=list(contentObjects or []),
|
||||
structure=structure,
|
||||
containerPath=containerPath,
|
||||
)
|
||||
handle = await self.requestIngestion(job)
|
||||
if handle.index is not None:
|
||||
return handle.index
|
||||
if handle.status == "duplicate":
|
||||
row = None
|
||||
try:
|
||||
row = self._knowledgeDb.getFileContentIndex(fileId)
|
||||
except Exception:
|
||||
row = None
|
||||
if isinstance(row, dict):
|
||||
try:
|
||||
return FileContentIndex(**row)
|
||||
except Exception:
|
||||
return None
|
||||
return row
|
||||
return None
|
||||
|
||||
async def _indexFileInternal(
|
||||
self,
|
||||
fileId: str,
|
||||
fileName: str,
|
||||
mimeType: str,
|
||||
userId: str,
|
||||
featureInstanceId: str = "",
|
||||
mandateId: str = "",
|
||||
contentObjects: List[Dict[str, Any]] = None,
|
||||
structure: Dict[str, Any] = None,
|
||||
containerPath: str = None,
|
||||
sourceKind: str = "file",
|
||||
connectionId: Optional[str] = None,
|
||||
neutralize: bool = False,
|
||||
) -> FileContentIndex:
|
||||
"""Index a file's content objects and create embeddings for text chunks.
|
||||
|
||||
|
|
@ -83,39 +418,41 @@ class KnowledgeService:
|
|||
"""
|
||||
contentObjects = contentObjects or []
|
||||
|
||||
# 1. Resolve scope fields from FileItem (Single Source of Truth)
|
||||
# FileItem lives in poweron_management; its scope/mandateId/featureInstanceId
|
||||
# are authoritative and must be mirrored onto the FileContentIndex.
|
||||
# 1. Resolve scope fields from FileItem (Single Source of Truth) for
|
||||
# uploaded files. Connector-sourced ingestion (sharepoint_item,
|
||||
# outlook_message, ...) has no FileItem row — trust the caller's
|
||||
# scope + ids directly.
|
||||
resolvedScope = "personal"
|
||||
resolvedMandateId = mandateId
|
||||
resolvedFeatureInstanceId = featureInstanceId
|
||||
resolvedUserId = userId
|
||||
_shouldNeutralize = False
|
||||
try:
|
||||
from modules.datamodels.datamodelFiles import FileItem as _FileItem
|
||||
_dbComponent = getattr(self._context, "interfaceDbComponent", None)
|
||||
_fileRecords = _dbComponent.getRecordset(_FileItem, recordFilter={"id": fileId}) if _dbComponent else []
|
||||
if not _fileRecords:
|
||||
from modules.interfaces.interfaceDbManagement import ComponentObjects
|
||||
_row = ComponentObjects().db._loadRecord(_FileItem, fileId)
|
||||
if _row:
|
||||
_fileRecords = [_row]
|
||||
if _fileRecords:
|
||||
_fileRecord = _fileRecords[0]
|
||||
_get = (lambda k, d=None: _fileRecord.get(k, d)) if isinstance(_fileRecord, dict) else (lambda k, d=None: getattr(_fileRecord, k, d))
|
||||
_shouldNeutralize = bool(_get("neutralize", False))
|
||||
_fileScope = _get("scope")
|
||||
if _fileScope:
|
||||
resolvedScope = _fileScope
|
||||
if not resolvedMandateId:
|
||||
resolvedMandateId = str(_get("mandateId", "") or "")
|
||||
if not resolvedFeatureInstanceId:
|
||||
resolvedFeatureInstanceId = str(_get("featureInstanceId", "") or "")
|
||||
_fileCreatedBy = _get("sysCreatedBy")
|
||||
if _fileCreatedBy:
|
||||
resolvedUserId = str(_fileCreatedBy)
|
||||
except Exception:
|
||||
pass
|
||||
_shouldNeutralize = neutralize # caller-supplied flag (connector prefs / IngestionJob)
|
||||
if sourceKind == "file":
|
||||
try:
|
||||
from modules.datamodels.datamodelFiles import FileItem as _FileItem
|
||||
_dbComponent = getattr(self._context, "interfaceDbComponent", None)
|
||||
_fileRecords = _dbComponent.getRecordset(_FileItem, recordFilter={"id": fileId}) if _dbComponent else []
|
||||
if not _fileRecords:
|
||||
from modules.interfaces.interfaceDbManagement import ComponentObjects
|
||||
_row = ComponentObjects().db._loadRecord(_FileItem, fileId)
|
||||
if _row:
|
||||
_fileRecords = [_row]
|
||||
if _fileRecords:
|
||||
_fileRecord = _fileRecords[0]
|
||||
_get = (lambda k, d=None: _fileRecord.get(k, d)) if isinstance(_fileRecord, dict) else (lambda k, d=None: getattr(_fileRecord, k, d))
|
||||
_shouldNeutralize = bool(_get("neutralize", False)) # FileItem is authoritative for uploads
|
||||
_fileScope = _get("scope")
|
||||
if _fileScope:
|
||||
resolvedScope = _fileScope
|
||||
if not resolvedMandateId:
|
||||
resolvedMandateId = str(_get("mandateId", "") or "")
|
||||
if not resolvedFeatureInstanceId:
|
||||
resolvedFeatureInstanceId = str(_get("featureInstanceId", "") or "")
|
||||
_fileCreatedBy = _get("sysCreatedBy")
|
||||
if _fileCreatedBy:
|
||||
resolvedUserId = str(_fileCreatedBy)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 2. Create FileContentIndex with correct scope from the start
|
||||
index = FileContentIndex(
|
||||
|
|
@ -124,6 +461,8 @@ class KnowledgeService:
|
|||
featureInstanceId=resolvedFeatureInstanceId,
|
||||
mandateId=resolvedMandateId,
|
||||
scope=resolvedScope,
|
||||
sourceKind=sourceKind,
|
||||
connectionId=connectionId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
containerPath=containerPath,
|
||||
|
|
@ -300,7 +639,12 @@ class KnowledgeService:
|
|||
Formatted context string for injection into the agent's system prompt.
|
||||
"""
|
||||
queryVector = await self._embedSingle(currentPrompt)
|
||||
logger.debug(
|
||||
"buildAgentContext.start userId=%s featureInstanceId=%s mandateId=%s isSysAdmin=%s prompt=%r",
|
||||
userId, featureInstanceId, mandateId, isSysAdmin, (currentPrompt or "")[:120],
|
||||
)
|
||||
if not queryVector:
|
||||
logger.debug("buildAgentContext.abort reason=no_query_vector")
|
||||
return ""
|
||||
|
||||
builder = _ContextBuilder(budget=contextBudget)
|
||||
|
|
@ -327,9 +671,14 @@ class KnowledgeService:
|
|||
featureInstanceId=featureInstanceId,
|
||||
mandateId=mandateId,
|
||||
limit=15,
|
||||
minScore=0.65,
|
||||
minScore=0.35,
|
||||
isSysAdmin=isSysAdmin,
|
||||
)
|
||||
logger.debug(
|
||||
"buildAgentContext.layer1 instanceChunks=%d top_scores=%s",
|
||||
len(instanceChunks),
|
||||
[round(float(c.get("_score", 0) or 0), 3) for c in (instanceChunks or [])[:3]],
|
||||
)
|
||||
if instanceChunks:
|
||||
builder.add(priority=1, label="Relevant Documents", items=instanceChunks, maxChars=4000)
|
||||
|
||||
|
|
@ -338,7 +687,7 @@ class KnowledgeService:
|
|||
queryVector=queryVector,
|
||||
workflowId=workflowId,
|
||||
limit=10,
|
||||
minScore=0.55,
|
||||
minScore=0.35,
|
||||
)
|
||||
if roundMemories:
|
||||
memItems = []
|
||||
|
|
@ -376,7 +725,7 @@ class KnowledgeService:
|
|||
scope="mandate",
|
||||
mandateId=mandateId,
|
||||
limit=10,
|
||||
minScore=0.7,
|
||||
minScore=0.35,
|
||||
isSysAdmin=isSysAdmin,
|
||||
)
|
||||
if mandateChunks:
|
||||
|
|
@ -392,7 +741,12 @@ class KnowledgeService:
|
|||
maxChars=500,
|
||||
)
|
||||
|
||||
return builder.build()
|
||||
_result = builder.build()
|
||||
logger.debug(
|
||||
"buildAgentContext.done totalChars=%d userId=%s",
|
||||
len(_result), userId,
|
||||
)
|
||||
return _result
|
||||
|
||||
# =========================================================================
|
||||
# Workflow Memory
|
||||
|
|
|
|||
|
|
@ -0,0 +1,334 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Connection-lifecycle consumer bridging OAuth events to ingestion jobs.
|
||||
|
||||
Subscribes to `connection.established` and `connection.revoked` callbacks
|
||||
emitted by the OAuth callbacks / connection management routes and dispatches:
|
||||
|
||||
- `connection.established` -> enqueue a `connection.bootstrap` BackgroundJob
|
||||
that walks the connector and ingests all reachable items via
|
||||
KnowledgeService.requestIngestion (file-like or virtual documents).
|
||||
- `connection.revoked` -> run `KnowledgeService.purgeConnection` synchronously
|
||||
so the knowledge corpus releases the data before the UI confirms the revoke.
|
||||
|
||||
The consumer is registered once at process boot (see `app.py` lifespan).
|
||||
It intentionally does NOT hold a per-user service context; each callback
|
||||
creates whatever context it needs from the UserConnection row itself.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||
from modules.shared.callbackRegistry import callbackRegistry
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import (
|
||||
registerJobHandler,
|
||||
startJob,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
BOOTSTRAP_JOB_TYPE = "connection.bootstrap"
|
||||
|
||||
_registered = False
|
||||
|
||||
|
||||
def _onConnectionEstablished(
|
||||
*,
|
||||
connectionId: str,
|
||||
authority: str,
|
||||
userId: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Fire-and-forget bootstrap enqueue for a freshly connected UserConnection."""
|
||||
if not connectionId:
|
||||
logger.warning("connection.established without connectionId; ignoring")
|
||||
return
|
||||
payload: Dict[str, Any] = {
|
||||
"connectionId": connectionId,
|
||||
"authority": (authority or "").lower(),
|
||||
"userId": userId,
|
||||
}
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.queued connectionId=%s authority=%s",
|
||||
connectionId, authority,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.queued",
|
||||
"connectionId": connectionId,
|
||||
"authority": authority,
|
||||
},
|
||||
)
|
||||
|
||||
async def _enqueue() -> None:
|
||||
try:
|
||||
await startJob(
|
||||
BOOTSTRAP_JOB_TYPE,
|
||||
payload,
|
||||
triggeredBy=userId,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"ingestion.connection.bootstrap.enqueue_failed connectionId=%s error=%s",
|
||||
connectionId, exc, exc_info=True,
|
||||
)
|
||||
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_running():
|
||||
loop.create_task(_enqueue())
|
||||
else:
|
||||
loop.run_until_complete(_enqueue())
|
||||
except RuntimeError:
|
||||
asyncio.run(_enqueue())
|
||||
|
||||
|
||||
def _onConnectionRevoked(
|
||||
*,
|
||||
connectionId: str,
|
||||
authority: Optional[str] = None,
|
||||
userId: Optional[str] = None,
|
||||
reason: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Run the knowledge purge synchronously so UI feedback is authoritative."""
|
||||
if not connectionId:
|
||||
logger.warning("connection.revoked without connectionId; ignoring")
|
||||
return
|
||||
try:
|
||||
# Purge lives on the DB interface to avoid ServiceCenter/user-context
|
||||
# plumbing here; the service method is a thin wrapper on top of this.
|
||||
result = getKnowledgeInterface(None).deleteFileContentIndexByConnectionId(connectionId)
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"ingestion.connection.purged.failed connectionId=%s error=%s",
|
||||
connectionId, exc, exc_info=True,
|
||||
)
|
||||
return
|
||||
logger.info(
|
||||
"ingestion.connection.purged connectionId=%s authority=%s reason=%s rows=%d chunks=%d",
|
||||
connectionId, authority, reason,
|
||||
result.get("indexRows", 0), result.get("chunks", 0),
|
||||
extra={
|
||||
"event": "ingestion.connection.purged",
|
||||
"connectionId": connectionId,
|
||||
"authority": authority,
|
||||
"reason": reason,
|
||||
"indexRows": result.get("indexRows", 0),
|
||||
"chunks": result.get("chunks", 0),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
async def _bootstrapJobHandler(
|
||||
job: Dict[str, Any],
|
||||
progressCb,
|
||||
) -> Dict[str, Any]:
|
||||
"""Dispatch bootstrap by authority. Each authority runs its own sub-bootstraps."""
|
||||
payload = job.get("payload") or {}
|
||||
connectionId = payload.get("connectionId")
|
||||
authority = (payload.get("authority") or "").lower()
|
||||
if not connectionId:
|
||||
raise ValueError("connection.bootstrap requires payload.connectionId")
|
||||
|
||||
progressCb(5, f"resolving {authority} connection")
|
||||
|
||||
# Defensive consent check: if the connection has since disabled knowledge ingestion
|
||||
# (e.g. user toggled setting after the job was enqueued), skip all walkers.
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
_root = getRootInterface()
|
||||
_conn = _root.getUserConnectionById(connectionId)
|
||||
if _conn and not getattr(_conn, "knowledgeIngestionEnabled", True):
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.skipped — consent disabled connectionId=%s",
|
||||
connectionId,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.skipped",
|
||||
"connectionId": connectionId,
|
||||
"authority": authority,
|
||||
"reason": "consent_disabled",
|
||||
},
|
||||
)
|
||||
return {"connectionId": connectionId, "authority": authority, "skipped": True, "reason": "consent_disabled"}
|
||||
except Exception as _guardErr:
|
||||
logger.debug("Could not load connection for consent guard: %s", _guardErr)
|
||||
|
||||
def _normalize(res: Any, label: str) -> Dict[str, Any]:
|
||||
if isinstance(res, Exception):
|
||||
logger.error(
|
||||
"ingestion.connection.bootstrap.failed part=%s connectionId=%s error=%s",
|
||||
label, connectionId, res, exc_info=res,
|
||||
)
|
||||
return {"error": str(res)}
|
||||
return res or {}
|
||||
|
||||
if authority == "msft":
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
|
||||
bootstrapSharepoint,
|
||||
)
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncOutlook import (
|
||||
bootstrapOutlook,
|
||||
)
|
||||
|
||||
progressCb(10, "sharepoint + outlook")
|
||||
spResult, olResult = await asyncio.gather(
|
||||
bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb),
|
||||
bootstrapOutlook(connectionId=connectionId, progressCb=progressCb),
|
||||
return_exceptions=True,
|
||||
)
|
||||
return {
|
||||
"connectionId": connectionId,
|
||||
"authority": authority,
|
||||
"sharepoint": _normalize(spResult, "sharepoint"),
|
||||
"outlook": _normalize(olResult, "outlook"),
|
||||
}
|
||||
|
||||
if authority == "google":
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive import (
|
||||
bootstrapGdrive,
|
||||
)
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
|
||||
bootstrapGmail,
|
||||
)
|
||||
|
||||
progressCb(10, "drive + gmail")
|
||||
gdResult, gmResult = await asyncio.gather(
|
||||
bootstrapGdrive(connectionId=connectionId, progressCb=progressCb),
|
||||
bootstrapGmail(connectionId=connectionId, progressCb=progressCb),
|
||||
return_exceptions=True,
|
||||
)
|
||||
return {
|
||||
"connectionId": connectionId,
|
||||
"authority": authority,
|
||||
"drive": _normalize(gdResult, "gdrive"),
|
||||
"gmail": _normalize(gmResult, "gmail"),
|
||||
}
|
||||
|
||||
if authority == "clickup":
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
|
||||
bootstrapClickup,
|
||||
)
|
||||
|
||||
progressCb(10, "clickup tasks")
|
||||
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb)
|
||||
return {
|
||||
"connectionId": connectionId,
|
||||
"authority": authority,
|
||||
"clickup": _normalize(cuResult, "clickup"),
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.skipped reason=unsupported_authority authority=%s connectionId=%s",
|
||||
authority, connectionId,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.skipped",
|
||||
"authority": authority,
|
||||
"connectionId": connectionId,
|
||||
"reason": "unsupported_authority",
|
||||
},
|
||||
)
|
||||
return {
|
||||
"connectionId": connectionId,
|
||||
"authority": authority,
|
||||
"skipped": True,
|
||||
"reason": "unsupported_authority",
|
||||
}
|
||||
|
||||
|
||||
async def _scheduledDailyResync() -> None:
|
||||
"""Enqueue a connection.bootstrap job for every active knowledge connection.
|
||||
|
||||
Runs once per day (default 2 AM Europe/Zurich). Each job re-walks the
|
||||
connector and hands new / changed items to KnowledgeService.requestIngestion.
|
||||
Unchanged items are deduplicated by content-hash and skipped automatically.
|
||||
"""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
rootInterface = getRootInterface()
|
||||
connections = rootInterface.getActiveKnowledgeConnections()
|
||||
except Exception as exc:
|
||||
logger.error("knowledge.daily_resync: could not load connections: %s", exc, exc_info=True)
|
||||
return
|
||||
|
||||
if not connections:
|
||||
logger.info("knowledge.daily_resync: no active knowledge connections — nothing to do")
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"knowledge.daily_resync: enqueuing bootstrap for %d connection(s)",
|
||||
len(connections),
|
||||
extra={"event": "knowledge.daily_resync.started", "count": len(connections)},
|
||||
)
|
||||
|
||||
enqueued = 0
|
||||
skipped = 0
|
||||
for conn in connections:
|
||||
connectionId = str(conn.id)
|
||||
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority)
|
||||
userId = str(conn.userId)
|
||||
payload: Dict[str, Any] = {
|
||||
"connectionId": connectionId,
|
||||
"authority": authority.lower(),
|
||||
"userId": userId,
|
||||
}
|
||||
try:
|
||||
await startJob(
|
||||
BOOTSTRAP_JOB_TYPE,
|
||||
payload,
|
||||
triggeredBy="scheduler.daily_resync",
|
||||
)
|
||||
enqueued += 1
|
||||
logger.debug(
|
||||
"knowledge.daily_resync: queued connectionId=%s authority=%s",
|
||||
connectionId, authority,
|
||||
)
|
||||
except Exception as exc:
|
||||
skipped += 1
|
||||
logger.error(
|
||||
"knowledge.daily_resync: failed to enqueue connectionId=%s: %s",
|
||||
connectionId, exc,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"knowledge.daily_resync: done — enqueued=%d skipped=%d",
|
||||
enqueued, skipped,
|
||||
extra={"event": "knowledge.daily_resync.done", "enqueued": enqueued, "skipped": skipped},
|
||||
)
|
||||
|
||||
|
||||
def registerDailyResyncScheduler(*, hour: int = 2, minute: int = 0) -> None:
|
||||
"""Register the daily knowledge re-sync cron job. Idempotent.
|
||||
|
||||
Args:
|
||||
hour: Hour of day to run (0–23, default 2 → 2 AM Europe/Zurich).
|
||||
minute: Minute within the hour (default 0).
|
||||
"""
|
||||
try:
|
||||
from modules.shared.eventManagement import eventManager
|
||||
eventManager.registerCron(
|
||||
jobId="knowledge.daily_resync",
|
||||
func=_scheduledDailyResync,
|
||||
cronKwargs={"hour": str(hour), "minute": str(minute)},
|
||||
)
|
||||
logger.info(
|
||||
"knowledge.daily_resync scheduler registered (daily %02d:%02d Europe/Zurich)",
|
||||
hour, minute,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("knowledge.daily_resync scheduler registration failed (non-critical): %s", exc)
|
||||
|
||||
|
||||
def registerKnowledgeIngestionConsumer() -> None:
|
||||
"""Register callback subscribers + background job handler. Idempotent."""
|
||||
global _registered
|
||||
if _registered:
|
||||
return
|
||||
callbackRegistry.register("connection.established", _onConnectionEstablished)
|
||||
callbackRegistry.register("connection.revoked", _onConnectionRevoked)
|
||||
registerJobHandler(BOOTSTRAP_JOB_TYPE, _bootstrapJobHandler)
|
||||
registerDailyResyncScheduler()
|
||||
_registered = True
|
||||
logger.info("KnowledgeIngestionConsumer registered (established/revoked + %s handler + daily resync)", BOOTSTRAP_JOB_TYPE)
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
"""Per-connection knowledge ingestion preference helpers.
|
||||
|
||||
Walkers call `loadConnectionPrefs(connectionId)` once at bootstrap start and
|
||||
receive a `ConnectionIngestionPrefs` dataclass they can pass down into their
|
||||
inner loops. All fields have safe defaults so walkers stay backward-compatible
|
||||
with connections that predate the §2.6 preference schema (knowledgePreferences
|
||||
is None).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DEFAULT_MAX_AGE_DAYS = 90
|
||||
_DEFAULT_MAIL_DEPTH = "full"
|
||||
_DEFAULT_CLICKUP_SCOPE = "title_description"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConnectionIngestionPrefs:
|
||||
"""Parsed per-connection preferences for knowledge ingestion walkers."""
|
||||
|
||||
# PII
|
||||
neutralizeBeforeEmbed: bool = False
|
||||
|
||||
# Mail (Outlook + Gmail)
|
||||
mailContentDepth: str = _DEFAULT_MAIL_DEPTH # "metadata" | "snippet" | "full"
|
||||
mailIndexAttachments: bool = False
|
||||
|
||||
# Files (Drive / SharePoint / OneDrive)
|
||||
filesIndexBinaries: bool = True
|
||||
mimeAllowlist: List[str] = field(default_factory=list) # empty = all allowed
|
||||
|
||||
# ClickUp
|
||||
clickupScope: str = _DEFAULT_CLICKUP_SCOPE # "titles" | "title_description" | "with_comments"
|
||||
clickupIndexAttachments: bool = False
|
||||
|
||||
# Per-authority surface toggles (default everything on)
|
||||
gmailEnabled: bool = True
|
||||
driveEnabled: bool = True
|
||||
sharepointEnabled: bool = True
|
||||
outlookEnabled: bool = True
|
||||
|
||||
# Time window
|
||||
maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS # 0 = no limit
|
||||
|
||||
|
||||
def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs:
|
||||
"""Load and parse per-connection preferences from the database.
|
||||
|
||||
Returns safe defaults for any missing or unparseable values so walkers
|
||||
never fail due to missing preference data.
|
||||
"""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
root = getRootInterface()
|
||||
conn = root.getUserConnectionById(connectionId)
|
||||
if not conn:
|
||||
logger.debug("loadConnectionPrefs: connection %s not found, using defaults", connectionId)
|
||||
return ConnectionIngestionPrefs()
|
||||
|
||||
raw: Optional[Dict[str, Any]] = getattr(conn, "knowledgePreferences", None)
|
||||
if not raw or not isinstance(raw, dict):
|
||||
return ConnectionIngestionPrefs()
|
||||
|
||||
def _bool(key: str, default: bool) -> bool:
|
||||
v = raw.get(key)
|
||||
return bool(v) if isinstance(v, bool) else default
|
||||
|
||||
def _str(key: str, allowed: List[str], default: str) -> str:
|
||||
v = raw.get(key)
|
||||
return v if v in allowed else default
|
||||
|
||||
def _int(key: str, default: int) -> int:
|
||||
v = raw.get(key)
|
||||
return int(v) if isinstance(v, int) else default
|
||||
|
||||
surface = raw.get("surfaceToggles") or {}
|
||||
google_surf = surface.get("google") or {}
|
||||
msft_surf = surface.get("msft") or {}
|
||||
|
||||
return ConnectionIngestionPrefs(
|
||||
neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False),
|
||||
mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
|
||||
mailIndexAttachments=_bool("mailIndexAttachments", False),
|
||||
filesIndexBinaries=_bool("filesIndexBinaries", True),
|
||||
mimeAllowlist=list(raw.get("mimeAllowlist") or []),
|
||||
clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
|
||||
clickupIndexAttachments=_bool("clickupIndexAttachments", False),
|
||||
gmailEnabled=bool(google_surf.get("gmail", True)),
|
||||
driveEnabled=bool(google_surf.get("drive", True)),
|
||||
sharepointEnabled=bool(msft_surf.get("sharepoint", True)),
|
||||
outlookEnabled=bool(msft_surf.get("outlook", True)),
|
||||
maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("loadConnectionPrefs failed for %s, using defaults: %s", connectionId, exc)
|
||||
return ConnectionIngestionPrefs()
|
||||
|
|
@ -0,0 +1,512 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""ClickUp bootstrap for the unified knowledge ingestion lane.
|
||||
|
||||
ClickUp tasks are ingested as *virtual documents* — we never download file
|
||||
bytes. Each task becomes a `sourceKind="clickup_task"` IngestionJob whose
|
||||
`contentObjects` carry a summary header (name + status + metadata) and the
|
||||
task description / text content so retrieval finds them without a live API
|
||||
call.
|
||||
|
||||
Hierarchy traversal: workspace (team) → spaces → folders / folderless lists →
|
||||
tasks. We cap the fan-out with `maxWorkspaces` / `maxListsPerWorkspace` /
|
||||
`maxTasks` and skip tasks older than `maxAgeDays` (default 180 d).
|
||||
|
||||
Idempotency: `date_updated` from the ClickUp task payload is a millisecond
|
||||
timestamp and strictly monotonic per revision — used as `contentVersion`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_TASKS_DEFAULT = 500
|
||||
MAX_WORKSPACES_DEFAULT = 3
|
||||
MAX_LISTS_PER_WORKSPACE_DEFAULT = 20
|
||||
MAX_DESCRIPTION_CHARS_DEFAULT = 8000
|
||||
MAX_AGE_DAYS_DEFAULT = 180
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClickupBootstrapLimits:
|
||||
maxTasks: int = MAX_TASKS_DEFAULT
|
||||
maxWorkspaces: int = MAX_WORKSPACES_DEFAULT
|
||||
maxListsPerWorkspace: int = MAX_LISTS_PER_WORKSPACE_DEFAULT
|
||||
maxDescriptionChars: int = MAX_DESCRIPTION_CHARS_DEFAULT
|
||||
# Only ingest tasks updated within the last N days. None disables filter.
|
||||
maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
|
||||
# Include closed/archived tasks if they still meet the recency filter.
|
||||
# ClickUp `closed` tasks often carry the most useful RAG context
|
||||
# ("why was this shipped the way it was?").
|
||||
includeClosed: bool = True
|
||||
# Pass-through to IngestionJob.neutralize
|
||||
neutralize: bool = False
|
||||
# Content scope: "titles" | "title_description" | "with_comments"
|
||||
clickupScope: str = "title_description"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClickupBootstrapResult:
|
||||
connectionId: str
|
||||
indexed: int = 0
|
||||
skippedDuplicate: int = 0
|
||||
skippedPolicy: int = 0
|
||||
failed: int = 0
|
||||
workspaces: int = 0
|
||||
lists: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def _syntheticTaskId(connectionId: str, taskId: str) -> str:
|
||||
token = hashlib.sha256(f"{connectionId}:{taskId}".encode("utf-8")).hexdigest()[:16]
|
||||
return f"cu:{connectionId[:8]}:{token}"
|
||||
|
||||
|
||||
def _truncate(value: Any, limit: int) -> str:
|
||||
text = str(value or "").strip()
|
||||
if not text:
|
||||
return ""
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return text[:limit].rstrip() + "\n[truncated]"
|
||||
|
||||
|
||||
def _isRecent(dateUpdatedMs: Any, maxAgeDays: Optional[int]) -> bool:
|
||||
if not maxAgeDays:
|
||||
return True
|
||||
if not dateUpdatedMs:
|
||||
return True
|
||||
try:
|
||||
ts = datetime.fromtimestamp(int(dateUpdatedMs) / 1000.0, tz=timezone.utc)
|
||||
except Exception:
|
||||
return True
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=maxAgeDays)
|
||||
return ts >= cutoff
|
||||
|
||||
|
||||
def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -> List[Dict[str, Any]]:
|
||||
"""Header (name/status/metadata) + optional description + text_content.
|
||||
|
||||
`limits.clickupScope` controls how much is embedded:
|
||||
- "titles": task name + status metadata only
|
||||
- "title_description": header + description / text_content (default)
|
||||
- "with_comments": header + description + text_content
|
||||
(comments themselves are not yet fetched in v1)
|
||||
"""
|
||||
name = task.get("name") or f"Task {task.get('id', '')}"
|
||||
status = ((task.get("status") or {}).get("status")) or ""
|
||||
assignees = ", ".join(
|
||||
filter(None, [
|
||||
(a.get("username") or a.get("email") or "")
|
||||
for a in (task.get("assignees") or [])
|
||||
])
|
||||
)
|
||||
tags = ", ".join(filter(None, [t.get("name", "") for t in (task.get("tags") or [])]))
|
||||
listInfo = task.get("list") or {}
|
||||
folderInfo = task.get("folder") or {}
|
||||
spaceInfo = task.get("space") or {}
|
||||
dueMs = task.get("due_date")
|
||||
dueIso = ""
|
||||
if dueMs:
|
||||
try:
|
||||
dueIso = datetime.fromtimestamp(int(dueMs) / 1000.0, tz=timezone.utc).strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
dueIso = ""
|
||||
|
||||
headerLines = [
|
||||
f"Task: {name}",
|
||||
f"Status: {status}" if status else "",
|
||||
f"List: {listInfo.get('name', '')}" if listInfo else "",
|
||||
f"Folder: {folderInfo.get('name', '')}" if folderInfo else "",
|
||||
f"Space: {spaceInfo.get('name', '')}" if spaceInfo else "",
|
||||
f"Assignees: {assignees}" if assignees else "",
|
||||
f"Tags: {tags}" if tags else "",
|
||||
f"Due: {dueIso}" if dueIso else "",
|
||||
f"Url: {task.get('url', '')}" if task.get("url") else "",
|
||||
]
|
||||
header = "\n".join(line for line in headerLines if line)
|
||||
|
||||
parts: List[Dict[str, Any]] = [{
|
||||
"contentObjectId": "header",
|
||||
"contentType": "text",
|
||||
"data": header,
|
||||
"contextRef": {"part": "header"},
|
||||
}]
|
||||
|
||||
scope = getattr(limits, "clickupScope", "title_description")
|
||||
if scope in ("title_description", "with_comments"):
|
||||
description = _truncate(task.get("description"), limits.maxDescriptionChars)
|
||||
if description:
|
||||
parts.append({
|
||||
"contentObjectId": "description",
|
||||
"contentType": "text",
|
||||
"data": description,
|
||||
"contextRef": {"part": "description"},
|
||||
})
|
||||
# text_content is ClickUp's rendered-markdown version; include if it adds
|
||||
# something beyond the plain description (common for bullet lists, checklists).
|
||||
textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars)
|
||||
if textContent and textContent != description:
|
||||
parts.append({
|
||||
"contentObjectId": "text_content",
|
||||
"contentType": "text",
|
||||
"data": textContent,
|
||||
"contextRef": {"part": "text_content"},
|
||||
})
|
||||
return parts
|
||||
|
||||
|
||||
async def bootstrapClickup(
|
||||
connectionId: str,
|
||||
*,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||
adapter: Any = None,
|
||||
connection: Any = None,
|
||||
knowledgeService: Any = None,
|
||||
limits: Optional[ClickupBootstrapLimits] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Walk workspaces → lists → tasks and ingest each task as a virtual doc."""
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||
prefs = loadConnectionPrefs(connectionId)
|
||||
|
||||
if not limits:
|
||||
limits = ClickupBootstrapLimits(
|
||||
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
||||
neutralize=prefs.neutralizeBeforeEmbed,
|
||||
clickupScope=prefs.clickupScope,
|
||||
)
|
||||
|
||||
startMs = time.time()
|
||||
result = ClickupBootstrapResult(connectionId=connectionId)
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.started part=clickup connectionId=%s",
|
||||
connectionId,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.started",
|
||||
"part": "clickup",
|
||||
"connectionId": connectionId,
|
||||
},
|
||||
)
|
||||
|
||||
if adapter is None or knowledgeService is None or connection is None:
|
||||
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
|
||||
|
||||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||
|
||||
svc = getattr(adapter, "_svc", None)
|
||||
if svc is None:
|
||||
result.errors.append("adapter missing _svc instance")
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
|
||||
try:
|
||||
teamsResp = await svc.getAuthorizedTeams()
|
||||
except Exception as exc:
|
||||
logger.error("clickup team discovery failed for %s: %s", connectionId, exc, exc_info=True)
|
||||
result.errors.append(f"teams: {exc}")
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
|
||||
teams = (teamsResp or {}).get("teams") or []
|
||||
for team in teams[: limits.maxWorkspaces]:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||
break
|
||||
teamId = str(team.get("id", "") or "")
|
||||
if not teamId:
|
||||
continue
|
||||
result.workspaces += 1
|
||||
try:
|
||||
await _walkTeam(
|
||||
svc=svc,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
team=team,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
|
||||
result.errors.append(f"team({teamId}): {exc}")
|
||||
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
|
||||
|
||||
async def _resolveDependencies(connectionId: str):
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.auth import TokenManager
|
||||
from modules.connectors.providerClickup.connectorClickup import ClickupConnector
|
||||
from modules.serviceCenter import getService
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
from modules.security.rootAccess import getRootUser
|
||||
|
||||
rootInterface = getRootInterface()
|
||||
connection = rootInterface.getUserConnectionById(connectionId)
|
||||
if connection is None:
|
||||
raise ValueError(f"UserConnection not found: {connectionId}")
|
||||
|
||||
token = TokenManager().getFreshToken(connectionId)
|
||||
if not token or not token.tokenAccess:
|
||||
raise ValueError(f"No valid token for connection {connectionId}")
|
||||
|
||||
provider = ClickupConnector(connection, token.tokenAccess)
|
||||
adapter = provider.getServiceAdapter("clickup")
|
||||
|
||||
rootUser = getRootUser()
|
||||
ctx = ServiceCenterContext(
|
||||
user=rootUser,
|
||||
mandate_id=str(getattr(connection, "mandateId", "") or ""),
|
||||
)
|
||||
knowledgeService = getService("knowledge", ctx)
|
||||
return adapter, connection, knowledgeService
|
||||
|
||||
|
||||
async def _walkTeam(
|
||||
*,
|
||||
svc,
|
||||
knowledgeService,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
team: Dict[str, Any],
|
||||
limits: ClickupBootstrapLimits,
|
||||
result: ClickupBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
) -> None:
|
||||
teamId = str(team.get("id", "") or "")
|
||||
spacesResp = await svc.getSpaces(teamId)
|
||||
spaces = (spacesResp or {}).get("spaces") or []
|
||||
|
||||
listsCollected: List[Dict[str, Any]] = []
|
||||
for space in spaces:
|
||||
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
||||
break
|
||||
spaceId = str(space.get("id", "") or "")
|
||||
if not spaceId:
|
||||
continue
|
||||
|
||||
# Folderless lists directly under the space
|
||||
folderless = await svc.getFolderlessLists(spaceId)
|
||||
for lst in (folderless or {}).get("lists") or []:
|
||||
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
||||
break
|
||||
listsCollected.append({**lst, "_space": space})
|
||||
|
||||
# Lists inside folders
|
||||
foldersResp = await svc.getFolders(spaceId)
|
||||
for folder in (foldersResp or {}).get("folders") or []:
|
||||
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
||||
break
|
||||
folderId = str(folder.get("id", "") or "")
|
||||
if not folderId:
|
||||
continue
|
||||
folderLists = await svc.getListsInFolder(folderId)
|
||||
for lst in (folderLists or {}).get("lists") or []:
|
||||
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
||||
break
|
||||
listsCollected.append({**lst, "_space": space, "_folder": folder})
|
||||
|
||||
for lst in listsCollected:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||
return
|
||||
result.lists += 1
|
||||
await _walkList(
|
||||
svc=svc,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
teamId=teamId,
|
||||
lst=lst,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
|
||||
|
||||
async def _walkList(
|
||||
*,
|
||||
svc,
|
||||
knowledgeService,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
teamId: str,
|
||||
lst: Dict[str, Any],
|
||||
limits: ClickupBootstrapLimits,
|
||||
result: ClickupBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
) -> None:
|
||||
listId = str(lst.get("id", "") or "")
|
||||
if not listId:
|
||||
return
|
||||
page = 0
|
||||
while result.indexed + result.skippedDuplicate < limits.maxTasks:
|
||||
resp = await svc.getTasksInList(
|
||||
listId,
|
||||
page=page,
|
||||
include_closed=limits.includeClosed,
|
||||
subtasks=True,
|
||||
)
|
||||
if isinstance(resp, dict) and resp.get("error"):
|
||||
logger.warning("clickup tasks list=%s page=%d error: %s", listId, page, resp.get("error"))
|
||||
result.errors.append(f"list({listId}): {resp.get('error')}")
|
||||
return
|
||||
tasks = (resp or {}).get("tasks") or []
|
||||
if not tasks:
|
||||
return
|
||||
|
||||
for task in tasks:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||
return
|
||||
if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
# Inject the list/folder/space metadata we already loaded.
|
||||
task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")}
|
||||
task["folder"] = task.get("folder") or lst.get("_folder") or {}
|
||||
task["space"] = task.get("space") or lst.get("_space") or {}
|
||||
await _ingestTask(
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
teamId=teamId,
|
||||
task=task,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
|
||||
if len(tasks) < 100: # ClickUp page-size hint: fewer than 100 => last page
|
||||
return
|
||||
page += 1
|
||||
|
||||
|
||||
async def _ingestTask(
|
||||
*,
|
||||
knowledgeService,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
teamId: str,
|
||||
task: Dict[str, Any],
|
||||
limits: ClickupBootstrapLimits,
|
||||
result: ClickupBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
) -> None:
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
taskId = str(task.get("id", "") or "")
|
||||
if not taskId:
|
||||
result.skippedPolicy += 1
|
||||
return
|
||||
revision = str(task.get("date_updated") or task.get("date_created") or "")
|
||||
name = task.get("name") or f"Task {taskId}"
|
||||
syntheticId = _syntheticTaskId(connectionId, taskId)
|
||||
fileName = f"{name[:80].strip() or taskId}.task.json"
|
||||
|
||||
contentObjects = _buildContentObjects(task, limits)
|
||||
|
||||
try:
|
||||
handle = await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="clickup_task",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType="application/vnd.clickup.task+json",
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision or None,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"authority": "clickup",
|
||||
"service": "clickup",
|
||||
"externalItemId": taskId,
|
||||
"teamId": teamId,
|
||||
"listId": ((task.get("list") or {}).get("id")),
|
||||
"spaceId": ((task.get("space") or {}).get("id")),
|
||||
"url": task.get("url"),
|
||||
"status": ((task.get("status") or {}).get("status")),
|
||||
"tier": limits.clickupScope,
|
||||
},
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("clickup ingestion %s failed: %s", taskId, exc, exc_info=True)
|
||||
result.failed += 1
|
||||
result.errors.append(f"ingest({taskId}): {exc}")
|
||||
return
|
||||
|
||||
if handle.status == "duplicate":
|
||||
result.skippedDuplicate += 1
|
||||
elif handle.status == "indexed":
|
||||
result.indexed += 1
|
||||
else:
|
||||
result.failed += 1
|
||||
|
||||
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
try:
|
||||
progressCb(
|
||||
min(90, 10 + int(80 * processed / max(1, limits.maxTasks))),
|
||||
f"clickup processed={processed}",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=clickup processed=%d skippedDup=%d failed=%d",
|
||||
processed, result.skippedDuplicate, result.failed,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.progress",
|
||||
"part": "clickup",
|
||||
"connectionId": connectionId,
|
||||
"processed": processed,
|
||||
"skippedDup": result.skippedDuplicate,
|
||||
"failed": result.failed,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||
durationMs = int((time.time() - startMs) * 1000)
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.done part=clickup connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d workspaces=%d lists=%d durationMs=%d",
|
||||
connectionId,
|
||||
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
||||
result.failed, result.workspaces, result.lists, durationMs,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.done",
|
||||
"part": "clickup",
|
||||
"connectionId": connectionId,
|
||||
"indexed": result.indexed,
|
||||
"skippedDup": result.skippedDuplicate,
|
||||
"skippedPolicy": result.skippedPolicy,
|
||||
"failed": result.failed,
|
||||
"workspaces": result.workspaces,
|
||||
"lists": result.lists,
|
||||
"durationMs": durationMs,
|
||||
},
|
||||
)
|
||||
return {
|
||||
"connectionId": result.connectionId,
|
||||
"indexed": result.indexed,
|
||||
"skippedDuplicate": result.skippedDuplicate,
|
||||
"skippedPolicy": result.skippedPolicy,
|
||||
"failed": result.failed,
|
||||
"workspaces": result.workspaces,
|
||||
"lists": result.lists,
|
||||
"durationMs": durationMs,
|
||||
"errors": result.errors[:20],
|
||||
}
|
||||
|
|
@ -0,0 +1,443 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Google Drive bootstrap for the unified knowledge ingestion lane.
|
||||
|
||||
Mirrors the SharePoint pilot (see subConnectorSyncSharepoint.py). Walks the
|
||||
user's *My Drive* tree from the virtual `root` folder, downloads each
|
||||
file-like item via `DriveAdapter.download` (which handles native Google docs
|
||||
via export), runs the standard extraction pipeline and routes results through
|
||||
`KnowledgeService.requestIngestion` with `sourceKind="gdrive_item"` and
|
||||
`contentVersion = modifiedTime` (monotonic per-revision).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_ITEMS_DEFAULT = 500
|
||||
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
|
||||
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
|
||||
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
||||
MAX_DEPTH_DEFAULT = 4
|
||||
MAX_AGE_DAYS_DEFAULT = 365
|
||||
|
||||
# Google Drive uses virtual mime-types for folders and non-downloadable assets.
|
||||
FOLDER_MIME = "application/vnd.google-apps.folder"
|
||||
|
||||
|
||||
@dataclass
|
||||
class GdriveBootstrapLimits:
|
||||
maxItems: int = MAX_ITEMS_DEFAULT
|
||||
maxBytes: int = MAX_BYTES_DEFAULT
|
||||
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
|
||||
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
|
||||
maxDepth: int = MAX_DEPTH_DEFAULT
|
||||
# Only ingest files modified within the last N days. None disables filter.
|
||||
maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
|
||||
# Pass-through to IngestionJob.neutralize
|
||||
neutralize: bool = False
|
||||
# Whether to skip binary/non-text files
|
||||
filesIndexBinaries: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class GdriveBootstrapResult:
|
||||
connectionId: str
|
||||
indexed: int = 0
|
||||
skippedDuplicate: int = 0
|
||||
skippedPolicy: int = 0
|
||||
failed: int = 0
|
||||
bytesProcessed: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
||||
token = hashlib.sha256(f"{connectionId}:{externalItemId}".encode("utf-8")).hexdigest()[:16]
|
||||
return f"gd:{connectionId[:8]}:{token}"
|
||||
|
||||
|
||||
def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
|
||||
parts = getattr(extracted, "parts", None) or []
|
||||
out: List[Dict[str, Any]] = []
|
||||
for part in parts:
|
||||
data = getattr(part, "data", None) or ""
|
||||
if not data or not str(data).strip():
|
||||
continue
|
||||
typeGroup = getattr(part, "typeGroup", "text") or "text"
|
||||
contentType = "text"
|
||||
if typeGroup == "image":
|
||||
contentType = "image"
|
||||
elif typeGroup in ("binary", "container"):
|
||||
contentType = "other"
|
||||
out.append({
|
||||
"contentObjectId": getattr(part, "id", ""),
|
||||
"contentType": contentType,
|
||||
"data": data,
|
||||
"contextRef": {
|
||||
"containerPath": fileName,
|
||||
"location": getattr(part, "label", None) or "file",
|
||||
**(getattr(part, "metadata", None) or {}),
|
||||
},
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
|
||||
if not maxAgeDays:
|
||||
return True
|
||||
if not modifiedIso:
|
||||
# No timestamp -> be permissive (Drive native docs sometimes omit it on export).
|
||||
return True
|
||||
try:
|
||||
# Google returns RFC 3339 with `Z` or offset; python 3.11+ parses both.
|
||||
ts = datetime.fromisoformat(modifiedIso.replace("Z", "+00:00"))
|
||||
except Exception:
|
||||
return True
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=maxAgeDays)
|
||||
if ts.tzinfo is None:
|
||||
ts = ts.replace(tzinfo=timezone.utc)
|
||||
return ts >= cutoff
|
||||
|
||||
|
||||
async def bootstrapGdrive(
|
||||
connectionId: str,
|
||||
*,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||
adapter: Any = None,
|
||||
connection: Any = None,
|
||||
knowledgeService: Any = None,
|
||||
limits: Optional[GdriveBootstrapLimits] = None,
|
||||
runExtractionFn: Optional[Callable[..., Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Walk My Drive starting from the virtual root folder."""
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||
prefs = loadConnectionPrefs(connectionId)
|
||||
|
||||
if not limits:
|
||||
limits = GdriveBootstrapLimits(
|
||||
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
||||
neutralize=prefs.neutralizeBeforeEmbed,
|
||||
filesIndexBinaries=prefs.filesIndexBinaries,
|
||||
)
|
||||
|
||||
startMs = time.time()
|
||||
result = GdriveBootstrapResult(connectionId=connectionId)
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.started part=gdrive connectionId=%s",
|
||||
connectionId,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.started",
|
||||
"part": "gdrive",
|
||||
"connectionId": connectionId,
|
||||
},
|
||||
)
|
||||
|
||||
if adapter is None or knowledgeService is None or connection is None:
|
||||
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
|
||||
if runExtractionFn is None:
|
||||
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
|
||||
ExtractorRegistry, ChunkerRegistry,
|
||||
)
|
||||
extractorRegistry = ExtractorRegistry()
|
||||
chunkerRegistry = ChunkerRegistry()
|
||||
|
||||
def runExtractionFn(bytesData, name, mime, options): # type: ignore[no-redef]
|
||||
return runExtraction(extractorRegistry, chunkerRegistry, bytesData, name, mime, options)
|
||||
|
||||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||
|
||||
try:
|
||||
await _walkFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
runExtractionFn=runExtractionFn,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
folderPath="/", # DriveAdapter.browse maps "" / "/" -> "root"
|
||||
depth=0,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("gdrive walk failed for %s: %s", connectionId, exc, exc_info=True)
|
||||
result.errors.append(f"walk: {exc}")
|
||||
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
|
||||
|
||||
async def _resolveDependencies(connectionId: str):
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.auth import TokenManager
|
||||
from modules.connectors.providerGoogle.connectorGoogle import GoogleConnector
|
||||
from modules.serviceCenter import getService
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
from modules.security.rootAccess import getRootUser
|
||||
|
||||
rootInterface = getRootInterface()
|
||||
connection = rootInterface.getUserConnectionById(connectionId)
|
||||
if connection is None:
|
||||
raise ValueError(f"UserConnection not found: {connectionId}")
|
||||
|
||||
token = TokenManager().getFreshToken(connectionId)
|
||||
if not token or not token.tokenAccess:
|
||||
raise ValueError(f"No valid token for connection {connectionId}")
|
||||
|
||||
provider = GoogleConnector(connection, token.tokenAccess)
|
||||
adapter = provider.getServiceAdapter("drive")
|
||||
|
||||
rootUser = getRootUser()
|
||||
ctx = ServiceCenterContext(
|
||||
user=rootUser,
|
||||
mandate_id=str(getattr(connection, "mandateId", "") or ""),
|
||||
)
|
||||
knowledgeService = getService("knowledge", ctx)
|
||||
return adapter, connection, knowledgeService
|
||||
|
||||
|
||||
async def _walkFolder(
|
||||
*,
|
||||
adapter,
|
||||
knowledgeService,
|
||||
runExtractionFn,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
folderPath: str,
|
||||
depth: int,
|
||||
limits: GdriveBootstrapLimits,
|
||||
result: GdriveBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
) -> None:
|
||||
if depth > limits.maxDepth:
|
||||
return
|
||||
try:
|
||||
entries = await adapter.browse(folderPath)
|
||||
except Exception as exc:
|
||||
logger.warning("gdrive browse %s failed: %s", folderPath, exc)
|
||||
result.errors.append(f"browse({folderPath}): {exc}")
|
||||
return
|
||||
|
||||
for entry in entries:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||
return
|
||||
if result.bytesProcessed >= limits.maxBytes:
|
||||
return
|
||||
|
||||
entryPath = getattr(entry, "path", "") or ""
|
||||
metadata = getattr(entry, "metadata", {}) or {}
|
||||
mimeType = getattr(entry, "mimeType", None) or metadata.get("mimeType")
|
||||
|
||||
if getattr(entry, "isFolder", False) or mimeType == FOLDER_MIME:
|
||||
await _walkFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
runExtractionFn=runExtractionFn,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
folderPath=entryPath,
|
||||
depth=depth + 1,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
continue
|
||||
|
||||
effectiveMime = mimeType or "application/octet-stream"
|
||||
if any(effectiveMime.startswith(prefix) for prefix in limits.skipMimePrefixes):
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
size = int(getattr(entry, "size", 0) or 0)
|
||||
if size and size > limits.maxFileSize:
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
modifiedTime = metadata.get("modifiedTime")
|
||||
if not _isRecent(modifiedTime, limits.maxAgeDays):
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
|
||||
externalItemId = metadata.get("id") or entryPath
|
||||
revision = modifiedTime
|
||||
|
||||
await _ingestOne(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
runExtractionFn=runExtractionFn,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
entry=entry,
|
||||
entryPath=entryPath,
|
||||
mimeType=effectiveMime,
|
||||
externalItemId=externalItemId,
|
||||
revision=revision,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
|
||||
|
||||
async def _ingestOne(
|
||||
*,
|
||||
adapter,
|
||||
knowledgeService,
|
||||
runExtractionFn,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
entry,
|
||||
entryPath: str,
|
||||
mimeType: str,
|
||||
externalItemId: str,
|
||||
revision: Optional[str],
|
||||
limits: GdriveBootstrapLimits,
|
||||
result: GdriveBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
) -> None:
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
syntheticFileId = _syntheticFileId(connectionId, externalItemId)
|
||||
fileName = getattr(entry, "name", "") or externalItemId
|
||||
|
||||
try:
|
||||
downloaded = await adapter.download(entryPath)
|
||||
except Exception as exc:
|
||||
logger.warning("gdrive download %s failed: %s", entryPath, exc)
|
||||
result.failed += 1
|
||||
result.errors.append(f"download({entryPath}): {exc}")
|
||||
return
|
||||
|
||||
# Adapter.download returns raw bytes today; guard DownloadResult shape too.
|
||||
fileBytes: bytes
|
||||
if isinstance(downloaded, (bytes, bytearray)):
|
||||
fileBytes = bytes(downloaded)
|
||||
else:
|
||||
fileBytes = bytes(getattr(downloaded, "data", b"") or b"")
|
||||
if getattr(downloaded, "mimeType", None):
|
||||
mimeType = downloaded.mimeType # export may have changed the type
|
||||
if not fileBytes:
|
||||
result.failed += 1
|
||||
return
|
||||
if len(fileBytes) > limits.maxFileSize:
|
||||
result.skippedPolicy += 1
|
||||
return
|
||||
|
||||
result.bytesProcessed += len(fileBytes)
|
||||
|
||||
try:
|
||||
extracted = runExtractionFn(
|
||||
fileBytes, fileName, mimeType,
|
||||
ExtractionOptions(mergeStrategy=None),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("gdrive extraction %s failed: %s", entryPath, exc)
|
||||
result.failed += 1
|
||||
result.errors.append(f"extract({entryPath}): {exc}")
|
||||
return
|
||||
|
||||
contentObjects = _toContentObjects(extracted, fileName)
|
||||
if not contentObjects:
|
||||
result.skippedPolicy += 1
|
||||
return
|
||||
|
||||
try:
|
||||
handle = await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="gdrive_item",
|
||||
sourceId=syntheticFileId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"authority": "google",
|
||||
"service": "drive",
|
||||
"externalItemId": externalItemId,
|
||||
"entryPath": entryPath,
|
||||
"tier": "body",
|
||||
},
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("gdrive ingestion %s failed: %s", entryPath, exc, exc_info=True)
|
||||
result.failed += 1
|
||||
result.errors.append(f"ingest({entryPath}): {exc}")
|
||||
return
|
||||
|
||||
if handle.status == "duplicate":
|
||||
result.skippedDuplicate += 1
|
||||
elif handle.status == "indexed":
|
||||
result.indexed += 1
|
||||
else:
|
||||
result.failed += 1
|
||||
|
||||
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
try:
|
||||
progressCb(
|
||||
min(90, 10 + int(80 * processed / max(1, limits.maxItems))),
|
||||
f"gdrive processed={processed}",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=gdrive processed=%d skippedDup=%d failed=%d",
|
||||
processed, result.skippedDuplicate, result.failed,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.progress",
|
||||
"part": "gdrive",
|
||||
"connectionId": connectionId,
|
||||
"processed": processed,
|
||||
"skippedDup": result.skippedDuplicate,
|
||||
"failed": result.failed,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||
durationMs = int((time.time() - startMs) * 1000)
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.done part=gdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d bytes=%d durationMs=%d",
|
||||
connectionId,
|
||||
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
||||
result.failed, result.bytesProcessed, durationMs,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.done",
|
||||
"part": "gdrive",
|
||||
"connectionId": connectionId,
|
||||
"indexed": result.indexed,
|
||||
"skippedDup": result.skippedDuplicate,
|
||||
"skippedPolicy": result.skippedPolicy,
|
||||
"failed": result.failed,
|
||||
"bytes": result.bytesProcessed,
|
||||
"durationMs": durationMs,
|
||||
},
|
||||
)
|
||||
return {
|
||||
"connectionId": result.connectionId,
|
||||
"indexed": result.indexed,
|
||||
"skippedDuplicate": result.skippedDuplicate,
|
||||
"skippedPolicy": result.skippedPolicy,
|
||||
"failed": result.failed,
|
||||
"bytesProcessed": result.bytesProcessed,
|
||||
"durationMs": durationMs,
|
||||
"errors": result.errors[:20],
|
||||
}
|
||||
|
|
@ -0,0 +1,606 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Gmail bootstrap for the unified knowledge ingestion lane.
|
||||
|
||||
Mirrors the Outlook pilot (see subConnectorSyncOutlook.py) but talks to Google
|
||||
Mail's REST API. Messages become `sourceKind="gmail_message"` virtual documents
|
||||
with header / snippet / cleaned body content-objects; attachments are optional
|
||||
child jobs with `sourceKind="gmail_attachment"`.
|
||||
|
||||
Idempotency: Gmail's stable `historyId` (or `internalDate` as fallback) is
|
||||
passed as `contentVersion`, so rerunning the bootstrap yields
|
||||
`ingestion.skipped.duplicate` for unchanged messages.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import hashlib
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_MESSAGES_DEFAULT = 500
|
||||
MAX_BODY_CHARS_DEFAULT = 8000
|
||||
MAX_ATTACHMENT_BYTES_DEFAULT = 10 * 1024 * 1024
|
||||
DEFAULT_LABELS = ("INBOX", "SENT")
|
||||
|
||||
|
||||
@dataclass
|
||||
class GmailBootstrapLimits:
|
||||
maxMessages: int = MAX_MESSAGES_DEFAULT
|
||||
labels: tuple = DEFAULT_LABELS
|
||||
maxBodyChars: int = MAX_BODY_CHARS_DEFAULT
|
||||
includeAttachments: bool = False
|
||||
maxAttachmentBytes: int = MAX_ATTACHMENT_BYTES_DEFAULT
|
||||
# Only fetch messages newer than N days. None disables filter.
|
||||
maxAgeDays: Optional[int] = 90
|
||||
# Content depth: "metadata" | "snippet" | "full"
|
||||
mailContentDepth: str = "full"
|
||||
# Pass-through to IngestionJob.neutralize
|
||||
neutralize: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class GmailBootstrapResult:
|
||||
connectionId: str
|
||||
indexed: int = 0
|
||||
skippedDuplicate: int = 0
|
||||
skippedPolicy: int = 0
|
||||
failed: int = 0
|
||||
attachmentsIndexed: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def _syntheticMessageId(connectionId: str, messageId: str) -> str:
|
||||
token = hashlib.sha256(f"{connectionId}:{messageId}".encode("utf-8")).hexdigest()[:16]
|
||||
return f"gm:{connectionId[:8]}:{token}"
|
||||
|
||||
|
||||
def _syntheticAttachmentId(connectionId: str, messageId: str, attachmentId: str) -> str:
|
||||
token = hashlib.sha256(
|
||||
f"{connectionId}:{messageId}:{attachmentId}".encode("utf-8")
|
||||
).hexdigest()[:16]
|
||||
return f"ga:{connectionId[:8]}:{token}"
|
||||
|
||||
|
||||
def _decodeBase64Url(data: str) -> bytes:
|
||||
if not data:
|
||||
return b""
|
||||
# Gmail uses URL-safe base64 without padding.
|
||||
padding = 4 - (len(data) % 4)
|
||||
if padding != 4:
|
||||
data = data + ("=" * padding)
|
||||
try:
|
||||
return base64.urlsafe_b64decode(data)
|
||||
except Exception:
|
||||
return b""
|
||||
|
||||
|
||||
def _walkPayloadForBody(payload: Dict[str, Any]) -> Dict[str, str]:
|
||||
"""Return {"text": ..., "html": ...} by walking MIME parts.
|
||||
|
||||
Gmail `payload` is a tree of parts. We prefer `text/plain` for the cleaned
|
||||
body, but capture `text/html` as a fallback so `cleanEmailBody` can strip
|
||||
markup if plain is missing.
|
||||
"""
|
||||
found: Dict[str, str] = {"text": "", "html": ""}
|
||||
|
||||
def _walk(part: Dict[str, Any]) -> None:
|
||||
mime = (part.get("mimeType") or "").lower()
|
||||
body = part.get("body") or {}
|
||||
raw = body.get("data") or ""
|
||||
if raw and mime.startswith("text/"):
|
||||
decoded = _decodeBase64Url(raw).decode("utf-8", errors="replace")
|
||||
key = "text" if mime == "text/plain" else ("html" if mime == "text/html" else "")
|
||||
if key and not found[key]:
|
||||
found[key] = decoded
|
||||
for sub in part.get("parts") or []:
|
||||
_walk(sub)
|
||||
|
||||
_walk(payload or {})
|
||||
return found
|
||||
|
||||
|
||||
def _headerMap(payload: Dict[str, Any]) -> Dict[str, str]:
|
||||
return {
|
||||
(h.get("name") or "").lower(): (h.get("value") or "")
|
||||
for h in (payload.get("headers") or [])
|
||||
}
|
||||
|
||||
|
||||
def _buildContentObjects(
|
||||
message: Dict[str, Any],
|
||||
maxBodyChars: int,
|
||||
mailContentDepth: str = "full",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Build content objects for a Gmail message.
|
||||
|
||||
`mailContentDepth` controls how much is embedded:
|
||||
- "metadata": header only (subject, from, to, date)
|
||||
- "snippet": header + Gmail snippet (~155 chars, no full body)
|
||||
- "full": header + snippet + cleaned full body (default)
|
||||
"""
|
||||
payload = message.get("payload") or {}
|
||||
headers = _headerMap(payload)
|
||||
subject = headers.get("subject") or "(no subject)"
|
||||
fromAddr = headers.get("from") or ""
|
||||
toAddr = headers.get("to") or ""
|
||||
ccAddr = headers.get("cc") or ""
|
||||
date = headers.get("date") or ""
|
||||
snippet = message.get("snippet") or ""
|
||||
|
||||
parts: List[Dict[str, Any]] = []
|
||||
header = (
|
||||
f"Subject: {subject}\n"
|
||||
f"From: {fromAddr}\n"
|
||||
f"To: {toAddr}\n"
|
||||
+ (f"Cc: {ccAddr}\n" if ccAddr else "")
|
||||
+ f"Date: {date}"
|
||||
)
|
||||
parts.append({
|
||||
"contentObjectId": "header",
|
||||
"contentType": "text",
|
||||
"data": header,
|
||||
"contextRef": {"part": "header"},
|
||||
})
|
||||
if mailContentDepth in ("snippet", "full") and snippet:
|
||||
parts.append({
|
||||
"contentObjectId": "snippet",
|
||||
"contentType": "text",
|
||||
"data": snippet,
|
||||
"contextRef": {"part": "snippet"},
|
||||
})
|
||||
if mailContentDepth == "full":
|
||||
bodies = _walkPayloadForBody(payload)
|
||||
rawBody = bodies["text"] or bodies["html"]
|
||||
cleanedBody = cleanEmailBody(rawBody, maxChars=maxBodyChars) if rawBody else ""
|
||||
if cleanedBody:
|
||||
parts.append({
|
||||
"contentObjectId": "body",
|
||||
"contentType": "text",
|
||||
"data": cleanedBody,
|
||||
"contextRef": {"part": "body"},
|
||||
})
|
||||
return parts
|
||||
|
||||
|
||||
async def bootstrapGmail(
|
||||
connectionId: str,
|
||||
*,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||
adapter: Any = None,
|
||||
connection: Any = None,
|
||||
knowledgeService: Any = None,
|
||||
limits: Optional[GmailBootstrapLimits] = None,
|
||||
googleGetFn: Optional[Callable[..., Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Enumerate Gmail labels (INBOX + SENT default) and ingest messages."""
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||
prefs = loadConnectionPrefs(connectionId)
|
||||
|
||||
if not limits:
|
||||
limits = GmailBootstrapLimits(
|
||||
includeAttachments=prefs.mailIndexAttachments,
|
||||
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
||||
mailContentDepth=prefs.mailContentDepth,
|
||||
neutralize=prefs.neutralizeBeforeEmbed,
|
||||
)
|
||||
|
||||
startMs = time.time()
|
||||
result = GmailBootstrapResult(connectionId=connectionId)
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.started part=gmail connectionId=%s",
|
||||
connectionId,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.started",
|
||||
"part": "gmail",
|
||||
"connectionId": connectionId,
|
||||
},
|
||||
)
|
||||
|
||||
if adapter is None or knowledgeService is None or connection is None:
|
||||
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
|
||||
|
||||
if googleGetFn is None:
|
||||
from modules.connectors.providerGoogle.connectorGoogle import _googleGet as _defaultGet
|
||||
|
||||
token = getattr(adapter, "_token", "")
|
||||
|
||||
async def googleGetFn(url: str) -> Dict[str, Any]: # type: ignore[no-redef]
|
||||
return await _defaultGet(token, url)
|
||||
|
||||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||
|
||||
for labelId in limits.labels:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||
break
|
||||
try:
|
||||
await _ingestLabel(
|
||||
googleGetFn=googleGetFn,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
labelId=labelId,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
|
||||
result.errors.append(f"label({labelId}): {exc}")
|
||||
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
|
||||
|
||||
async def _resolveDependencies(connectionId: str):
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.auth import TokenManager
|
||||
from modules.connectors.providerGoogle.connectorGoogle import GoogleConnector
|
||||
from modules.serviceCenter import getService
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
from modules.security.rootAccess import getRootUser
|
||||
|
||||
rootInterface = getRootInterface()
|
||||
connection = rootInterface.getUserConnectionById(connectionId)
|
||||
if connection is None:
|
||||
raise ValueError(f"UserConnection not found: {connectionId}")
|
||||
|
||||
token = TokenManager().getFreshToken(connectionId)
|
||||
if not token or not token.tokenAccess:
|
||||
raise ValueError(f"No valid token for connection {connectionId}")
|
||||
|
||||
provider = GoogleConnector(connection, token.tokenAccess)
|
||||
adapter = provider.getServiceAdapter("gmail")
|
||||
|
||||
rootUser = getRootUser()
|
||||
ctx = ServiceCenterContext(
|
||||
user=rootUser,
|
||||
mandate_id=str(getattr(connection, "mandateId", "") or ""),
|
||||
)
|
||||
knowledgeService = getService("knowledge", ctx)
|
||||
return adapter, connection, knowledgeService
|
||||
|
||||
|
||||
async def _ingestLabel(
|
||||
*,
|
||||
googleGetFn,
|
||||
knowledgeService,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
labelId: str,
|
||||
limits: GmailBootstrapLimits,
|
||||
result: GmailBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
) -> None:
|
||||
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
|
||||
if remaining <= 0:
|
||||
return
|
||||
|
||||
pageSize = min(100, remaining)
|
||||
query = ""
|
||||
if limits.maxAgeDays:
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=limits.maxAgeDays)
|
||||
# Gmail uses YYYY/MM/DD.
|
||||
query = f"after:{cutoff.strftime('%Y/%m/%d')}"
|
||||
|
||||
baseUrl = (
|
||||
"https://gmail.googleapis.com/gmail/v1/users/me/messages"
|
||||
f"?labelIds={labelId}&maxResults={pageSize}"
|
||||
)
|
||||
if query:
|
||||
baseUrl = f"{baseUrl}&q={query}"
|
||||
|
||||
nextPageToken: Optional[str] = None
|
||||
while (result.indexed + result.skippedDuplicate) < limits.maxMessages:
|
||||
url = baseUrl if not nextPageToken else f"{baseUrl}&pageToken={nextPageToken}"
|
||||
page = await googleGetFn(url)
|
||||
if not isinstance(page, dict) or "error" in page:
|
||||
err = (page or {}).get("error") if isinstance(page, dict) else "unknown"
|
||||
logger.warning("gmail list page error for label %s: %s", labelId, err)
|
||||
result.errors.append(f"list({labelId}): {err}")
|
||||
return
|
||||
|
||||
messageStubs = page.get("messages") or []
|
||||
for stub in messageStubs:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||
break
|
||||
msgId = stub.get("id")
|
||||
if not msgId:
|
||||
continue
|
||||
detailUrl = (
|
||||
f"https://gmail.googleapis.com/gmail/v1/users/me/messages/{msgId}?format=full"
|
||||
)
|
||||
detail = await googleGetFn(detailUrl)
|
||||
if not isinstance(detail, dict) or "error" in detail:
|
||||
result.failed += 1
|
||||
continue
|
||||
await _ingestMessage(
|
||||
googleGetFn=googleGetFn,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
labelId=labelId,
|
||||
message=detail,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
|
||||
nextPageToken = page.get("nextPageToken")
|
||||
if not nextPageToken:
|
||||
break
|
||||
|
||||
|
||||
async def _ingestMessage(
|
||||
*,
|
||||
googleGetFn,
|
||||
knowledgeService,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
labelId: str,
|
||||
message: Dict[str, Any],
|
||||
limits: GmailBootstrapLimits,
|
||||
result: GmailBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
) -> None:
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
messageId = message.get("id")
|
||||
if not messageId:
|
||||
result.skippedPolicy += 1
|
||||
return
|
||||
revision = message.get("historyId") or message.get("internalDate")
|
||||
headers = _headerMap(message.get("payload") or {})
|
||||
subject = headers.get("subject") or "(no subject)"
|
||||
syntheticId = _syntheticMessageId(connectionId, messageId)
|
||||
fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml"
|
||||
|
||||
contentObjects = _buildContentObjects(
|
||||
message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth
|
||||
)
|
||||
try:
|
||||
handle = await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="gmail_message",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType="message/rfc822",
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=str(revision) if revision else None,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"authority": "google",
|
||||
"service": "gmail",
|
||||
"externalItemId": messageId,
|
||||
"label": labelId,
|
||||
"threadId": message.get("threadId"),
|
||||
"tier": limits.mailContentDepth,
|
||||
},
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("gmail ingestion %s failed: %s", messageId, exc, exc_info=True)
|
||||
result.failed += 1
|
||||
result.errors.append(f"ingest({messageId}): {exc}")
|
||||
return
|
||||
|
||||
if handle.status == "duplicate":
|
||||
result.skippedDuplicate += 1
|
||||
elif handle.status == "indexed":
|
||||
result.indexed += 1
|
||||
else:
|
||||
result.failed += 1
|
||||
|
||||
if limits.includeAttachments:
|
||||
try:
|
||||
await _ingestAttachments(
|
||||
googleGetFn=googleGetFn,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
message=message,
|
||||
parentSyntheticId=syntheticId,
|
||||
limits=limits,
|
||||
result=result,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("gmail attachments %s failed: %s", messageId, exc)
|
||||
result.errors.append(f"attachments({messageId}): {exc}")
|
||||
|
||||
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
try:
|
||||
progressCb(
|
||||
min(90, 10 + int(80 * processed / max(1, limits.maxMessages))),
|
||||
f"gmail processed={processed}",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=gmail processed=%d skippedDup=%d failed=%d",
|
||||
processed, result.skippedDuplicate, result.failed,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.progress",
|
||||
"part": "gmail",
|
||||
"connectionId": connectionId,
|
||||
"processed": processed,
|
||||
"skippedDup": result.skippedDuplicate,
|
||||
"failed": result.failed,
|
||||
},
|
||||
)
|
||||
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
async def _ingestAttachments(
|
||||
*,
|
||||
googleGetFn,
|
||||
knowledgeService,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
message: Dict[str, Any],
|
||||
parentSyntheticId: str,
|
||||
limits: GmailBootstrapLimits,
|
||||
result: GmailBootstrapResult,
|
||||
) -> None:
|
||||
"""Child ingestion jobs for file attachments. Skips inline images (cid: refs)."""
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
|
||||
ExtractorRegistry, ChunkerRegistry,
|
||||
)
|
||||
|
||||
messageId = message.get("id") or ""
|
||||
|
||||
def _collectAttachmentStubs(part: Dict[str, Any], acc: List[Dict[str, Any]]) -> None:
|
||||
filename = part.get("filename") or ""
|
||||
body = part.get("body") or {}
|
||||
attId = body.get("attachmentId")
|
||||
if filename and attId:
|
||||
acc.append({
|
||||
"filename": filename,
|
||||
"mimeType": part.get("mimeType") or "application/octet-stream",
|
||||
"attachmentId": attId,
|
||||
"size": int(body.get("size") or 0),
|
||||
})
|
||||
for sub in part.get("parts") or []:
|
||||
_collectAttachmentStubs(sub, acc)
|
||||
|
||||
stubs: List[Dict[str, Any]] = []
|
||||
_collectAttachmentStubs(message.get("payload") or {}, stubs)
|
||||
if not stubs:
|
||||
return
|
||||
|
||||
extractorRegistry = ExtractorRegistry()
|
||||
chunkerRegistry = ChunkerRegistry()
|
||||
|
||||
for stub in stubs:
|
||||
if stub["size"] and stub["size"] > limits.maxAttachmentBytes:
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
attUrl = (
|
||||
f"https://gmail.googleapis.com/gmail/v1/users/me/messages/{messageId}"
|
||||
f"/attachments/{stub['attachmentId']}"
|
||||
)
|
||||
detail = await googleGetFn(attUrl)
|
||||
if not isinstance(detail, dict) or "error" in detail:
|
||||
result.failed += 1
|
||||
continue
|
||||
rawBytes = _decodeBase64Url(detail.get("data") or "")
|
||||
if not rawBytes:
|
||||
continue
|
||||
fileName = stub["filename"]
|
||||
mimeType = stub["mimeType"]
|
||||
syntheticId = _syntheticAttachmentId(connectionId, messageId, stub["attachmentId"])
|
||||
|
||||
try:
|
||||
extracted = runExtraction(
|
||||
extractorRegistry, chunkerRegistry,
|
||||
rawBytes, fileName, mimeType,
|
||||
ExtractionOptions(mergeStrategy=None),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("gmail attachment extract %s failed: %s", stub["attachmentId"], exc)
|
||||
result.failed += 1
|
||||
continue
|
||||
|
||||
contentObjects: List[Dict[str, Any]] = []
|
||||
for part in getattr(extracted, "parts", None) or []:
|
||||
data = getattr(part, "data", None) or ""
|
||||
if not data or not str(data).strip():
|
||||
continue
|
||||
typeGroup = getattr(part, "typeGroup", "text") or "text"
|
||||
contentType = "text"
|
||||
if typeGroup == "image":
|
||||
contentType = "image"
|
||||
elif typeGroup in ("binary", "container"):
|
||||
contentType = "other"
|
||||
contentObjects.append({
|
||||
"contentObjectId": getattr(part, "id", ""),
|
||||
"contentType": contentType,
|
||||
"data": data,
|
||||
"contextRef": {
|
||||
"containerPath": fileName,
|
||||
"location": getattr(part, "label", None) or "attachment",
|
||||
**(getattr(part, "metadata", None) or {}),
|
||||
},
|
||||
})
|
||||
if not contentObjects:
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="gmail_attachment",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"authority": "google",
|
||||
"service": "gmail",
|
||||
"parentId": parentSyntheticId,
|
||||
"externalItemId": stub["attachmentId"],
|
||||
"parentMessageId": messageId,
|
||||
},
|
||||
)
|
||||
)
|
||||
result.attachmentsIndexed += 1
|
||||
except Exception as exc:
|
||||
logger.warning("gmail attachment ingest %s failed: %s", stub["attachmentId"], exc)
|
||||
result.failed += 1
|
||||
|
||||
|
||||
def _finalizeResult(connectionId: str, result: GmailBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||
durationMs = int((time.time() - startMs) * 1000)
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.done part=gmail connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d attachments=%d failed=%d durationMs=%d",
|
||||
connectionId,
|
||||
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
||||
result.attachmentsIndexed, result.failed, durationMs,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.done",
|
||||
"part": "gmail",
|
||||
"connectionId": connectionId,
|
||||
"indexed": result.indexed,
|
||||
"skippedDup": result.skippedDuplicate,
|
||||
"skippedPolicy": result.skippedPolicy,
|
||||
"attachmentsIndexed": result.attachmentsIndexed,
|
||||
"failed": result.failed,
|
||||
"durationMs": durationMs,
|
||||
},
|
||||
)
|
||||
return {
|
||||
"connectionId": result.connectionId,
|
||||
"indexed": result.indexed,
|
||||
"skippedDuplicate": result.skippedDuplicate,
|
||||
"skippedPolicy": result.skippedPolicy,
|
||||
"attachmentsIndexed": result.attachmentsIndexed,
|
||||
"failed": result.failed,
|
||||
"durationMs": durationMs,
|
||||
"errors": result.errors[:20],
|
||||
}
|
||||
|
|
@ -0,0 +1,576 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Outlook bootstrap for the unified knowledge ingestion lane.
|
||||
|
||||
Unlike SharePoint, Outlook messages are "virtual documents" — we never persist
|
||||
file bytes in the store. Each message becomes a `sourceKind="outlook_message"`
|
||||
IngestionJob whose `contentObjects` carry the header, snippet and cleaned body
|
||||
so retrieval can show a compact answer without fetching Graph again.
|
||||
|
||||
Attachments are optional (`includeAttachments` limit flag) and enqueued as
|
||||
child jobs with `sourceKind="outlook_attachment"` + `provenance.parentId`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_MESSAGES_DEFAULT = 500
|
||||
MAX_FOLDERS_DEFAULT = 5
|
||||
MAX_BODY_CHARS_DEFAULT = 8000
|
||||
MAX_ATTACHMENT_BYTES_DEFAULT = 10 * 1024 * 1024
|
||||
WELL_KNOWN_FOLDERS = ("inbox", "sentitems")
|
||||
|
||||
|
||||
@dataclass
|
||||
class OutlookBootstrapLimits:
|
||||
maxMessages: int = MAX_MESSAGES_DEFAULT
|
||||
maxFolders: int = MAX_FOLDERS_DEFAULT
|
||||
maxBodyChars: int = MAX_BODY_CHARS_DEFAULT
|
||||
includeAttachments: bool = False
|
||||
maxAttachmentBytes: int = MAX_ATTACHMENT_BYTES_DEFAULT
|
||||
# Only fetch messages newer than N days. None disables filter.
|
||||
maxAgeDays: Optional[int] = 90
|
||||
# Content depth: "metadata" | "snippet" | "full"
|
||||
mailContentDepth: str = "full"
|
||||
# Pass-through to IngestionJob.neutralize
|
||||
neutralize: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class OutlookBootstrapResult:
|
||||
connectionId: str
|
||||
indexed: int = 0
|
||||
skippedDuplicate: int = 0
|
||||
skippedPolicy: int = 0
|
||||
failed: int = 0
|
||||
attachmentsIndexed: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def _syntheticMessageId(connectionId: str, messageId: str) -> str:
|
||||
token = hashlib.sha256(f"{connectionId}:{messageId}".encode("utf-8")).hexdigest()[:16]
|
||||
return f"om:{connectionId[:8]}:{token}"
|
||||
|
||||
|
||||
def _syntheticAttachmentId(connectionId: str, messageId: str, attachmentId: str) -> str:
|
||||
token = hashlib.sha256(
|
||||
f"{connectionId}:{messageId}:{attachmentId}".encode("utf-8")
|
||||
).hexdigest()[:16]
|
||||
return f"oa:{connectionId[:8]}:{token}"
|
||||
|
||||
|
||||
def _extractRecipient(recipient: Dict[str, Any]) -> str:
|
||||
email = (recipient or {}).get("emailAddress") or {}
|
||||
name = email.get("name") or ""
|
||||
addr = email.get("address") or ""
|
||||
if name and addr:
|
||||
return f"{name} <{addr}>"
|
||||
return addr or name
|
||||
|
||||
|
||||
def _joinRecipients(recipients: List[Dict[str, Any]]) -> str:
|
||||
return ", ".join(filter(None, [_extractRecipient(r) for r in recipients or []]))
|
||||
|
||||
|
||||
def _buildContentObjects(
|
||||
message: Dict[str, Any],
|
||||
maxBodyChars: int,
|
||||
mailContentDepth: str = "full",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Build content objects for an Outlook message.
|
||||
|
||||
`mailContentDepth` mirrors the Gmail walker:
|
||||
- "metadata": header only
|
||||
- "snippet": header + bodyPreview (~255 chars)
|
||||
- "full": header + snippet + cleaned body (default)
|
||||
"""
|
||||
subject = message.get("subject") or "(no subject)"
|
||||
fromAddr = _extractRecipient(message.get("from") or {})
|
||||
toAddr = _joinRecipients(message.get("toRecipients") or [])
|
||||
ccAddr = _joinRecipients(message.get("ccRecipients") or [])
|
||||
received = message.get("receivedDateTime") or ""
|
||||
snippet = message.get("bodyPreview") or ""
|
||||
|
||||
parts: List[Dict[str, Any]] = []
|
||||
header = (
|
||||
f"Subject: {subject}\n"
|
||||
f"From: {fromAddr}\n"
|
||||
f"To: {toAddr}\n"
|
||||
+ (f"Cc: {ccAddr}\n" if ccAddr else "")
|
||||
+ f"Date: {received}"
|
||||
)
|
||||
parts.append({
|
||||
"contentObjectId": "header",
|
||||
"contentType": "text",
|
||||
"data": header,
|
||||
"contextRef": {"part": "header"},
|
||||
})
|
||||
if mailContentDepth in ("snippet", "full") and snippet:
|
||||
parts.append({
|
||||
"contentObjectId": "snippet",
|
||||
"contentType": "text",
|
||||
"data": snippet,
|
||||
"contextRef": {"part": "snippet"},
|
||||
})
|
||||
if mailContentDepth == "full":
|
||||
body = message.get("body") or {}
|
||||
bodyContent = body.get("content") or ""
|
||||
cleanedBody = cleanEmailBody(bodyContent, maxChars=maxBodyChars) if bodyContent else ""
|
||||
if cleanedBody:
|
||||
parts.append({
|
||||
"contentObjectId": "body",
|
||||
"contentType": "text",
|
||||
"data": cleanedBody,
|
||||
"contextRef": {"part": "body"},
|
||||
})
|
||||
return parts
|
||||
|
||||
|
||||
async def bootstrapOutlook(
|
||||
connectionId: str,
|
||||
*,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||
adapter: Any = None,
|
||||
connection: Any = None,
|
||||
knowledgeService: Any = None,
|
||||
limits: Optional[OutlookBootstrapLimits] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Enumerate Outlook folders (inbox + sent by default) and ingest messages."""
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||
prefs = loadConnectionPrefs(connectionId)
|
||||
|
||||
if not limits:
|
||||
limits = OutlookBootstrapLimits(
|
||||
includeAttachments=prefs.mailIndexAttachments,
|
||||
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
||||
mailContentDepth=prefs.mailContentDepth,
|
||||
neutralize=prefs.neutralizeBeforeEmbed,
|
||||
)
|
||||
|
||||
startMs = time.time()
|
||||
result = OutlookBootstrapResult(connectionId=connectionId)
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.started part=outlook connectionId=%s",
|
||||
connectionId,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.started",
|
||||
"part": "outlook",
|
||||
"connectionId": connectionId,
|
||||
},
|
||||
)
|
||||
|
||||
if adapter is None or knowledgeService is None or connection is None:
|
||||
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
|
||||
|
||||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||
|
||||
folderIds = await _selectFolderIds(adapter, limits)
|
||||
for folderId in folderIds:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||
break
|
||||
try:
|
||||
await _ingestFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
folderId=folderId,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
|
||||
result.errors.append(f"folder({folderId}): {exc}")
|
||||
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
|
||||
|
||||
async def _resolveDependencies(connectionId: str):
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.auth import TokenManager
|
||||
from modules.connectors.providerMsft.connectorMsft import MsftConnector
|
||||
from modules.serviceCenter import getService
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
from modules.security.rootAccess import getRootUser
|
||||
|
||||
rootInterface = getRootInterface()
|
||||
connection = rootInterface.getUserConnectionById(connectionId)
|
||||
if connection is None:
|
||||
raise ValueError(f"UserConnection not found: {connectionId}")
|
||||
|
||||
token = TokenManager().getFreshToken(connectionId)
|
||||
if not token or not token.tokenAccess:
|
||||
raise ValueError(f"No valid token for connection {connectionId}")
|
||||
|
||||
provider = MsftConnector(connection, token.tokenAccess)
|
||||
adapter = provider.getServiceAdapter("outlook")
|
||||
|
||||
rootUser = getRootUser()
|
||||
ctx = ServiceCenterContext(
|
||||
user=rootUser,
|
||||
mandate_id=str(getattr(connection, "mandateId", "") or ""),
|
||||
)
|
||||
knowledgeService = getService("knowledge", ctx)
|
||||
return adapter, connection, knowledgeService
|
||||
|
||||
|
||||
async def _selectFolderIds(adapter, limits: OutlookBootstrapLimits) -> List[str]:
|
||||
"""Prefer well-known folders (inbox, sentitems); fall back to browse()."""
|
||||
folderIds: List[str] = []
|
||||
for wellKnown in WELL_KNOWN_FOLDERS:
|
||||
if len(folderIds) >= limits.maxFolders:
|
||||
break
|
||||
try:
|
||||
row = await adapter._graphGet(f"me/mailFolders/{wellKnown}")
|
||||
except Exception:
|
||||
row = None
|
||||
if isinstance(row, dict) and "error" not in row and row.get("id"):
|
||||
folderIds.append(row["id"])
|
||||
|
||||
if len(folderIds) < limits.maxFolders:
|
||||
try:
|
||||
entries = await adapter.browse("/")
|
||||
except Exception:
|
||||
entries = []
|
||||
for entry in entries:
|
||||
metadata = getattr(entry, "metadata", {}) or {}
|
||||
fid = metadata.get("id")
|
||||
if fid and fid not in folderIds:
|
||||
folderIds.append(fid)
|
||||
if len(folderIds) >= limits.maxFolders:
|
||||
break
|
||||
return folderIds
|
||||
|
||||
|
||||
async def _ingestFolder(
|
||||
*,
|
||||
adapter,
|
||||
knowledgeService,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
folderId: str,
|
||||
limits: OutlookBootstrapLimits,
|
||||
result: OutlookBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
) -> None:
|
||||
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
|
||||
if remaining <= 0:
|
||||
return
|
||||
|
||||
pageSize = min(100, remaining)
|
||||
select = (
|
||||
"id,subject,from,toRecipients,ccRecipients,receivedDateTime,"
|
||||
"bodyPreview,body,internetMessageId,hasAttachments,changeKey"
|
||||
)
|
||||
endpoint: Optional[str] = (
|
||||
f"me/mailFolders/{folderId}/messages"
|
||||
f"?$top={pageSize}&$orderby=receivedDateTime desc&$select={select}"
|
||||
)
|
||||
|
||||
# Keep header-based age filter in Graph itself to avoid shipping ancient
|
||||
# messages we'd discard client-side.
|
||||
if limits.maxAgeDays:
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=limits.maxAgeDays)
|
||||
cutoffIso = cutoff.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
endpoint = f"{endpoint}&$filter=receivedDateTime ge {cutoffIso}"
|
||||
|
||||
while endpoint and (result.indexed + result.skippedDuplicate) < limits.maxMessages:
|
||||
try:
|
||||
page = await adapter._graphGet(endpoint)
|
||||
except Exception as exc:
|
||||
logger.warning("outlook graph page failed for folder %s: %s", folderId, exc)
|
||||
result.errors.append(f"graph({folderId}): {exc}")
|
||||
return
|
||||
if not isinstance(page, dict) or "error" in page:
|
||||
err = (page or {}).get("error") if isinstance(page, dict) else "unknown"
|
||||
logger.warning("outlook graph page error for folder %s: %s", folderId, err)
|
||||
result.errors.append(f"graph({folderId}): {err}")
|
||||
return
|
||||
|
||||
for message in page.get("value", []) or []:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||
break
|
||||
await _ingestMessage(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
message=message,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
|
||||
nextLink = page.get("@odata.nextLink")
|
||||
if not nextLink:
|
||||
break
|
||||
# Strip Graph base so adapter._graphGet accepts the relative path.
|
||||
from modules.connectors.providerMsft.connectorMsft import _stripGraphBase
|
||||
|
||||
endpoint = _stripGraphBase(nextLink)
|
||||
|
||||
|
||||
async def _ingestMessage(
|
||||
*,
|
||||
adapter,
|
||||
knowledgeService,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
message: Dict[str, Any],
|
||||
limits: OutlookBootstrapLimits,
|
||||
result: OutlookBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
) -> None:
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
messageId = message.get("id")
|
||||
if not messageId:
|
||||
result.skippedPolicy += 1
|
||||
return
|
||||
revision = message.get("changeKey") or message.get("internetMessageId")
|
||||
subject = message.get("subject") or "(no subject)"
|
||||
syntheticId = _syntheticMessageId(connectionId, messageId)
|
||||
fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml"
|
||||
|
||||
contentObjects = _buildContentObjects(
|
||||
message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth
|
||||
)
|
||||
# Always at least the header is emitted, so `contentObjects` is non-empty.
|
||||
try:
|
||||
handle = await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="outlook_message",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType="message/rfc822",
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"authority": "msft",
|
||||
"service": "outlook",
|
||||
"externalItemId": messageId,
|
||||
"internetMessageId": message.get("internetMessageId"),
|
||||
"tier": limits.mailContentDepth,
|
||||
},
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("outlook ingestion %s failed: %s", messageId, exc, exc_info=True)
|
||||
result.failed += 1
|
||||
result.errors.append(f"ingest({messageId}): {exc}")
|
||||
return
|
||||
|
||||
if handle.status == "duplicate":
|
||||
result.skippedDuplicate += 1
|
||||
elif handle.status == "indexed":
|
||||
result.indexed += 1
|
||||
else:
|
||||
result.failed += 1
|
||||
|
||||
if limits.includeAttachments and message.get("hasAttachments"):
|
||||
try:
|
||||
await _ingestAttachments(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
messageId=messageId,
|
||||
parentSyntheticId=syntheticId,
|
||||
limits=limits,
|
||||
result=result,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("outlook attachments %s failed: %s", messageId, exc)
|
||||
result.errors.append(f"attachments({messageId}): {exc}")
|
||||
|
||||
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
try:
|
||||
progressCb(
|
||||
min(90, 10 + int(80 * processed / max(1, limits.maxMessages))),
|
||||
f"outlook processed={processed}",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=outlook processed=%d skippedDup=%d failed=%d",
|
||||
processed, result.skippedDuplicate, result.failed,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.progress",
|
||||
"part": "outlook",
|
||||
"connectionId": connectionId,
|
||||
"processed": processed,
|
||||
"skippedDup": result.skippedDuplicate,
|
||||
"failed": result.failed,
|
||||
},
|
||||
)
|
||||
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
async def _ingestAttachments(
|
||||
*,
|
||||
adapter,
|
||||
knowledgeService,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
messageId: str,
|
||||
parentSyntheticId: str,
|
||||
limits: OutlookBootstrapLimits,
|
||||
result: OutlookBootstrapResult,
|
||||
) -> None:
|
||||
"""Child ingestion jobs for file attachments (skip inline & oversized)."""
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
|
||||
ExtractorRegistry, ChunkerRegistry,
|
||||
)
|
||||
import base64
|
||||
|
||||
page = await adapter._graphGet(f"me/messages/{messageId}/attachments")
|
||||
if not isinstance(page, dict) or "error" in page:
|
||||
return
|
||||
|
||||
extractorRegistry = ExtractorRegistry()
|
||||
chunkerRegistry = ChunkerRegistry()
|
||||
|
||||
for attachment in page.get("value", []) or []:
|
||||
if attachment.get("@odata.type") != "#microsoft.graph.fileAttachment":
|
||||
continue
|
||||
if attachment.get("isInline"):
|
||||
continue
|
||||
size = int(attachment.get("size") or 0)
|
||||
if size and size > limits.maxAttachmentBytes:
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
contentBytesB64 = attachment.get("contentBytes")
|
||||
if not contentBytesB64:
|
||||
continue
|
||||
try:
|
||||
rawBytes = base64.b64decode(contentBytesB64)
|
||||
except Exception:
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
fileName = attachment.get("name") or "attachment"
|
||||
mimeType = attachment.get("contentType") or "application/octet-stream"
|
||||
attachmentId = attachment.get("id") or fileName
|
||||
syntheticId = _syntheticAttachmentId(connectionId, messageId, attachmentId)
|
||||
|
||||
try:
|
||||
extracted = runExtraction(
|
||||
extractorRegistry, chunkerRegistry,
|
||||
rawBytes, fileName, mimeType,
|
||||
ExtractionOptions(mergeStrategy=None),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("outlook attachment extract %s failed: %s", attachmentId, exc)
|
||||
result.failed += 1
|
||||
continue
|
||||
|
||||
contentObjects: List[Dict[str, Any]] = []
|
||||
for part in getattr(extracted, "parts", None) or []:
|
||||
data = getattr(part, "data", None) or ""
|
||||
if not data or not str(data).strip():
|
||||
continue
|
||||
typeGroup = getattr(part, "typeGroup", "text") or "text"
|
||||
contentType = "text"
|
||||
if typeGroup == "image":
|
||||
contentType = "image"
|
||||
elif typeGroup in ("binary", "container"):
|
||||
contentType = "other"
|
||||
contentObjects.append({
|
||||
"contentObjectId": getattr(part, "id", ""),
|
||||
"contentType": contentType,
|
||||
"data": data,
|
||||
"contextRef": {
|
||||
"containerPath": fileName,
|
||||
"location": getattr(part, "label", None) or "attachment",
|
||||
**(getattr(part, "metadata", None) or {}),
|
||||
},
|
||||
})
|
||||
if not contentObjects:
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="outlook_attachment",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"authority": "msft",
|
||||
"service": "outlook",
|
||||
"parentId": parentSyntheticId,
|
||||
"externalItemId": attachmentId,
|
||||
"parentMessageId": messageId,
|
||||
},
|
||||
)
|
||||
)
|
||||
result.attachmentsIndexed += 1
|
||||
except Exception as exc:
|
||||
logger.warning("outlook attachment ingest %s failed: %s", attachmentId, exc)
|
||||
result.failed += 1
|
||||
|
||||
|
||||
def _finalizeResult(connectionId: str, result: OutlookBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||
durationMs = int((time.time() - startMs) * 1000)
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.done part=outlook connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d attachments=%d failed=%d durationMs=%d",
|
||||
connectionId,
|
||||
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
||||
result.attachmentsIndexed, result.failed, durationMs,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.done",
|
||||
"part": "outlook",
|
||||
"connectionId": connectionId,
|
||||
"indexed": result.indexed,
|
||||
"skippedDup": result.skippedDuplicate,
|
||||
"skippedPolicy": result.skippedPolicy,
|
||||
"attachmentsIndexed": result.attachmentsIndexed,
|
||||
"failed": result.failed,
|
||||
"durationMs": durationMs,
|
||||
},
|
||||
)
|
||||
return {
|
||||
"connectionId": result.connectionId,
|
||||
"indexed": result.indexed,
|
||||
"skippedDuplicate": result.skippedDuplicate,
|
||||
"skippedPolicy": result.skippedPolicy,
|
||||
"attachmentsIndexed": result.attachmentsIndexed,
|
||||
"failed": result.failed,
|
||||
"durationMs": durationMs,
|
||||
"errors": result.errors[:20],
|
||||
}
|
||||
|
|
@ -0,0 +1,433 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""SharePoint bootstrap for the unified knowledge ingestion lane.
|
||||
|
||||
Walks the SharePoint drive(s) reachable via a UserConnection, downloads each
|
||||
file-like item, runs the standard content extraction pipeline and hands the
|
||||
result to `KnowledgeService.requestIngestion`. Idempotency is provided by the
|
||||
ingestion façade itself; repeat bootstraps therefore produce
|
||||
`ingestion.skipped.duplicate` for every unchanged item because we pass the
|
||||
Graph `eTag` as `contentVersion`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_ITEMS_DEFAULT = 500
|
||||
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
|
||||
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
|
||||
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
||||
MAX_DEPTH_DEFAULT = 4
|
||||
MAX_SITES_DEFAULT = 3
|
||||
|
||||
|
||||
@dataclass
|
||||
class SharepointBootstrapLimits:
|
||||
maxItems: int = MAX_ITEMS_DEFAULT
|
||||
maxBytes: int = MAX_BYTES_DEFAULT
|
||||
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
|
||||
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
|
||||
maxDepth: int = MAX_DEPTH_DEFAULT
|
||||
maxSites: int = MAX_SITES_DEFAULT
|
||||
# Pass-through to IngestionJob.neutralize
|
||||
neutralize: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class SharepointBootstrapResult:
|
||||
connectionId: str
|
||||
indexed: int = 0
|
||||
skippedDuplicate: int = 0
|
||||
skippedPolicy: int = 0
|
||||
failed: int = 0
|
||||
bytesProcessed: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
||||
"""Deterministic synthetic FileContentIndex id for a SharePoint item.
|
||||
|
||||
Stable across bootstraps → idempotency works; independent of file name so
|
||||
moves/renames don't duplicate chunks.
|
||||
"""
|
||||
token = hashlib.sha256(f"{connectionId}:{externalItemId}".encode("utf-8")).hexdigest()[:16]
|
||||
return f"sp:{connectionId[:8]}:{token}"
|
||||
|
||||
|
||||
def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
|
||||
"""Translate ExtractionResult → content objects accepted by requestIngestion."""
|
||||
parts = getattr(extracted, "parts", None) or []
|
||||
out: List[Dict[str, Any]] = []
|
||||
for part in parts:
|
||||
data = getattr(part, "data", None) or ""
|
||||
if not data or not str(data).strip():
|
||||
continue
|
||||
typeGroup = getattr(part, "typeGroup", "text") or "text"
|
||||
contentType = "text"
|
||||
if typeGroup == "image":
|
||||
contentType = "image"
|
||||
elif typeGroup in ("binary", "container"):
|
||||
contentType = "other"
|
||||
out.append({
|
||||
"contentObjectId": getattr(part, "id", ""),
|
||||
"contentType": contentType,
|
||||
"data": data,
|
||||
"contextRef": {
|
||||
"containerPath": fileName,
|
||||
"location": getattr(part, "label", None) or "file",
|
||||
**(getattr(part, "metadata", None) or {}),
|
||||
},
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
async def bootstrapSharepoint(
|
||||
connectionId: str,
|
||||
*,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||
adapter: Any = None,
|
||||
connection: Any = None,
|
||||
knowledgeService: Any = None,
|
||||
limits: Optional[SharepointBootstrapLimits] = None,
|
||||
runExtractionFn: Optional[Callable[..., Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Enumerate SharePoint drives and ingest every reachable file via the façade.
|
||||
|
||||
Parameters allow injection for tests; production callers pass only
|
||||
`connectionId` (and optionally a progressCb) and everything else is
|
||||
resolved against the registered services.
|
||||
"""
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||
prefs = loadConnectionPrefs(connectionId)
|
||||
|
||||
if not limits:
|
||||
limits = SharepointBootstrapLimits(neutralize=prefs.neutralizeBeforeEmbed)
|
||||
|
||||
startMs = time.time()
|
||||
result = SharepointBootstrapResult(connectionId=connectionId)
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.started part=sharepoint connectionId=%s",
|
||||
connectionId,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.started",
|
||||
"part": "sharepoint",
|
||||
"connectionId": connectionId,
|
||||
},
|
||||
)
|
||||
|
||||
if adapter is None or knowledgeService is None or connection is None:
|
||||
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
|
||||
if runExtractionFn is None:
|
||||
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
|
||||
ExtractorRegistry, ChunkerRegistry,
|
||||
)
|
||||
extractorRegistry = ExtractorRegistry()
|
||||
chunkerRegistry = ChunkerRegistry()
|
||||
|
||||
def runExtractionFn(bytesData, name, mime, options): # type: ignore[no-redef]
|
||||
return runExtraction(extractorRegistry, chunkerRegistry, bytesData, name, mime, options)
|
||||
|
||||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||
|
||||
try:
|
||||
sites = await adapter.browse("/", limit=limits.maxSites)
|
||||
except Exception as exc:
|
||||
logger.error("sharepoint site discovery failed for %s: %s", connectionId, exc, exc_info=True)
|
||||
result.errors.append(f"site_discovery: {exc}")
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
|
||||
for site in sites[: limits.maxSites]:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||
break
|
||||
sitePath = getattr(site, "path", "") or ""
|
||||
try:
|
||||
await _walkFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
runExtractionFn=runExtractionFn,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
folderPath=sitePath,
|
||||
depth=0,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("sharepoint walk failed for site %s: %s", sitePath, exc, exc_info=True)
|
||||
result.errors.append(f"walk({sitePath}): {exc}")
|
||||
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
|
||||
|
||||
async def _resolveDependencies(connectionId: str):
|
||||
"""Load connection, instantiate SharepointAdapter, and build a KnowledgeService.
|
||||
|
||||
Runs with root privileges: bootstrap is a system operation triggered by an
|
||||
authenticated user via callback; it must not be gated by a per-user
|
||||
service-center context.
|
||||
"""
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.auth import TokenManager
|
||||
from modules.connectors.providerMsft.connectorMsft import MsftConnector
|
||||
from modules.serviceCenter import getService
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
from modules.security.rootAccess import getRootUser
|
||||
|
||||
rootInterface = getRootInterface()
|
||||
connection = rootInterface.getUserConnectionById(connectionId)
|
||||
if connection is None:
|
||||
raise ValueError(f"UserConnection not found: {connectionId}")
|
||||
|
||||
token = TokenManager().getFreshToken(connectionId)
|
||||
if not token or not token.tokenAccess:
|
||||
raise ValueError(f"No valid token for connection {connectionId}")
|
||||
|
||||
provider = MsftConnector(connection, token.tokenAccess)
|
||||
adapter = provider.getServiceAdapter("sharepoint")
|
||||
|
||||
rootUser = getRootUser()
|
||||
ctx = ServiceCenterContext(
|
||||
user=rootUser,
|
||||
mandate_id=str(getattr(connection, "mandateId", "") or ""),
|
||||
)
|
||||
knowledgeService = getService("knowledge", ctx)
|
||||
return adapter, connection, knowledgeService
|
||||
|
||||
|
||||
async def _walkFolder(
|
||||
*,
|
||||
adapter,
|
||||
knowledgeService,
|
||||
runExtractionFn,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
folderPath: str,
|
||||
depth: int,
|
||||
limits: SharepointBootstrapLimits,
|
||||
result: SharepointBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
) -> None:
|
||||
if depth > limits.maxDepth:
|
||||
return
|
||||
try:
|
||||
entries = await adapter.browse(folderPath)
|
||||
except Exception as exc:
|
||||
logger.warning("sharepoint browse %s failed: %s", folderPath, exc)
|
||||
result.errors.append(f"browse({folderPath}): {exc}")
|
||||
return
|
||||
|
||||
for entry in entries:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||
return
|
||||
if result.bytesProcessed >= limits.maxBytes:
|
||||
return
|
||||
|
||||
entryPath = getattr(entry, "path", "") or ""
|
||||
if getattr(entry, "isFolder", False):
|
||||
await _walkFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
runExtractionFn=runExtractionFn,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
folderPath=entryPath,
|
||||
depth=depth + 1,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
continue
|
||||
|
||||
mimeType = getattr(entry, "mimeType", None) or "application/octet-stream"
|
||||
if any(mimeType.startswith(prefix) for prefix in limits.skipMimePrefixes):
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
size = int(getattr(entry, "size", 0) or 0)
|
||||
if size and size > limits.maxFileSize:
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
|
||||
metadata = getattr(entry, "metadata", {}) or {}
|
||||
externalItemId = metadata.get("id") or entryPath
|
||||
revision = metadata.get("revision") or metadata.get("lastModifiedDateTime")
|
||||
|
||||
await _ingestOne(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
runExtractionFn=runExtractionFn,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
entry=entry,
|
||||
entryPath=entryPath,
|
||||
mimeType=mimeType,
|
||||
externalItemId=externalItemId,
|
||||
revision=revision,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
|
||||
|
||||
async def _ingestOne(
|
||||
*,
|
||||
adapter,
|
||||
knowledgeService,
|
||||
runExtractionFn,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
entry,
|
||||
entryPath: str,
|
||||
mimeType: str,
|
||||
externalItemId: str,
|
||||
revision: Optional[str],
|
||||
limits: SharepointBootstrapLimits,
|
||||
result: SharepointBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
) -> None:
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
syntheticFileId = _syntheticFileId(connectionId, externalItemId)
|
||||
fileName = getattr(entry, "name", "") or externalItemId
|
||||
|
||||
try:
|
||||
fileBytes = await adapter.download(entryPath)
|
||||
except Exception as exc:
|
||||
logger.warning("sharepoint download %s failed: %s", entryPath, exc)
|
||||
result.failed += 1
|
||||
result.errors.append(f"download({entryPath}): {exc}")
|
||||
return
|
||||
if not fileBytes:
|
||||
result.failed += 1
|
||||
return
|
||||
|
||||
result.bytesProcessed += len(fileBytes)
|
||||
|
||||
try:
|
||||
extracted = runExtractionFn(
|
||||
fileBytes, fileName, mimeType,
|
||||
ExtractionOptions(mergeStrategy=None),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("sharepoint extraction %s failed: %s", entryPath, exc)
|
||||
result.failed += 1
|
||||
result.errors.append(f"extract({entryPath}): {exc}")
|
||||
return
|
||||
|
||||
contentObjects = _toContentObjects(extracted, fileName)
|
||||
if not contentObjects:
|
||||
result.skippedPolicy += 1
|
||||
return
|
||||
|
||||
provenance: Dict[str, Any] = {
|
||||
"connectionId": connectionId,
|
||||
"authority": "msft",
|
||||
"service": "sharepoint",
|
||||
"externalItemId": externalItemId,
|
||||
"externalPath": entryPath,
|
||||
"revision": revision,
|
||||
}
|
||||
try:
|
||||
handle = await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="sharepoint_item",
|
||||
sourceId=syntheticFileId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision,
|
||||
neutralize=limits.neutralize,
|
||||
provenance=provenance,
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("sharepoint ingestion %s failed: %s", entryPath, exc, exc_info=True)
|
||||
result.failed += 1
|
||||
result.errors.append(f"ingest({entryPath}): {exc}")
|
||||
return
|
||||
|
||||
if handle.status == "duplicate":
|
||||
result.skippedDuplicate += 1
|
||||
elif handle.status == "indexed":
|
||||
result.indexed += 1
|
||||
else:
|
||||
result.failed += 1
|
||||
if handle.error:
|
||||
result.errors.append(f"ingest({entryPath}): {handle.error}")
|
||||
|
||||
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
try:
|
||||
progressCb(
|
||||
min(90, 10 + int(80 * processed / max(1, limits.maxItems))),
|
||||
f"sharepoint processed={processed}",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=sharepoint processed=%d skippedDup=%d failed=%d",
|
||||
processed, result.skippedDuplicate, result.failed,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.progress",
|
||||
"part": "sharepoint",
|
||||
"connectionId": connectionId,
|
||||
"processed": processed,
|
||||
"skippedDup": result.skippedDuplicate,
|
||||
"failed": result.failed,
|
||||
},
|
||||
)
|
||||
|
||||
# Yield so the event loop can interleave other tasks (download/extract are
|
||||
# CPU-ish and extraction uses sync libs; cooperative scheduling prevents
|
||||
# starving other workers).
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||
durationMs = int((time.time() - startMs) * 1000)
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.done part=sharepoint connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d",
|
||||
connectionId,
|
||||
result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed,
|
||||
durationMs,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.done",
|
||||
"part": "sharepoint",
|
||||
"connectionId": connectionId,
|
||||
"indexed": result.indexed,
|
||||
"skippedDup": result.skippedDuplicate,
|
||||
"skippedPolicy": result.skippedPolicy,
|
||||
"failed": result.failed,
|
||||
"durationMs": durationMs,
|
||||
},
|
||||
)
|
||||
return {
|
||||
"connectionId": result.connectionId,
|
||||
"indexed": result.indexed,
|
||||
"skippedDuplicate": result.skippedDuplicate,
|
||||
"skippedPolicy": result.skippedPolicy,
|
||||
"failed": result.failed,
|
||||
"bytesProcessed": result.bytesProcessed,
|
||||
"durationMs": durationMs,
|
||||
"errors": result.errors[:20],
|
||||
}
|
||||
107
modules/serviceCenter/services/serviceKnowledge/subTextClean.py
Normal file
107
modules/serviceCenter/services/serviceKnowledge/subTextClean.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Text normalisation utilities used by knowledge ingestion.
|
||||
|
||||
The email body cleaning logic is intentionally regex-based and works on plain
|
||||
text after an HTML→text pass so we never store unsanitised HTML/JS in the
|
||||
knowledge store and retrieval stays robust (no extraneous markup tokens
|
||||
eating embedding budget).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
DEFAULT_MAX_CHARS = 8000
|
||||
|
||||
|
||||
_QUOTE_MARKER_PATTERNS = [
|
||||
re.compile(r"^\s*(?:On\s.+?\swrote:)\s*$", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^\s*(?:Am\s.+?\sschrieb.+?:)\s*$", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^\s*-{2,}\s*Original\s*Message\s*-{2,}\s*$", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^\s*-{2,}\s*Urspr.+Nachricht\s*-{2,}\s*$", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^\s*From:\s+.+$", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^\s*Von:\s+.+$", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^\s*Sent:\s+.+$", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^\s*Gesendet:\s+.+$", re.MULTILINE | re.IGNORECASE),
|
||||
]
|
||||
|
||||
_SIGNATURE_MARKERS = [
|
||||
re.compile(r"^\s*-{2,}\s*$", re.MULTILINE),
|
||||
re.compile(r"^\s*—\s*$", re.MULTILINE),
|
||||
re.compile(r"^\s*Best regards\b.*$", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^\s*Kind regards\b.*$", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^\s*Mit freundlichen Gr[üu]ßen\b.*$", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^\s*Viele Gr[üu]ße\b.*$", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^\s*Best,\s*$", re.MULTILINE | re.IGNORECASE),
|
||||
]
|
||||
|
||||
|
||||
def _htmlToText(html: str) -> str:
|
||||
"""Prefer BeautifulSoup when available, fall back to regex."""
|
||||
try:
|
||||
from bs4 import BeautifulSoup # type: ignore
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
for tag in soup(["script", "style", "head"]):
|
||||
tag.decompose()
|
||||
for br in soup.find_all(["br"]):
|
||||
br.replace_with("\n")
|
||||
for p in soup.find_all(["p", "div", "li", "tr"]):
|
||||
p.append("\n")
|
||||
text = soup.get_text()
|
||||
except Exception:
|
||||
# Minimal fallback: strip tags crudely.
|
||||
text = re.sub(r"<br\s*/?>", "\n", html, flags=re.IGNORECASE)
|
||||
text = re.sub(r"</(?:p|div|li|tr)>", "\n", text, flags=re.IGNORECASE)
|
||||
text = re.sub(r"<[^>]+>", "", text)
|
||||
# Collapse non-breaking + zero-width whitespace.
|
||||
text = text.replace("\u00a0", " ").replace("\u200b", "")
|
||||
return text
|
||||
|
||||
|
||||
def _stripQuotedThread(text: str) -> str:
|
||||
"""Remove reply-chain content so only the author's own contribution remains."""
|
||||
earliest = len(text)
|
||||
for pattern in _QUOTE_MARKER_PATTERNS:
|
||||
match = pattern.search(text)
|
||||
if match and match.start() < earliest:
|
||||
earliest = match.start()
|
||||
# Drop any block starting with "> " quoted lines (often Gmail/Thunderbird).
|
||||
quotedBlock = re.search(r"^(?:\s*>.*\n?)+", text, re.MULTILINE)
|
||||
if quotedBlock and quotedBlock.start() < earliest:
|
||||
earliest = quotedBlock.start()
|
||||
return text[:earliest].rstrip()
|
||||
|
||||
|
||||
def _stripSignature(text: str) -> str:
|
||||
earliest = len(text)
|
||||
for pattern in _SIGNATURE_MARKERS:
|
||||
match = pattern.search(text)
|
||||
if match and match.start() < earliest:
|
||||
earliest = match.start()
|
||||
return text[:earliest].rstrip()
|
||||
|
||||
|
||||
def _collapseWhitespace(text: str) -> str:
|
||||
text = re.sub(r"[ \t]+", " ", text)
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def cleanEmailBody(html: str, maxChars: Optional[int] = DEFAULT_MAX_CHARS) -> str:
|
||||
"""Return a compact plain-text view of an email body suitable for embedding.
|
||||
|
||||
Steps: HTML → text, remove quoted reply chain, remove signature, collapse
|
||||
whitespace, truncate to maxChars. Always returns a string (possibly empty).
|
||||
"""
|
||||
if not html:
|
||||
return ""
|
||||
text = _htmlToText(html) if "<" in html and ">" in html else html
|
||||
text = _stripQuotedThread(text)
|
||||
text = _stripSignature(text)
|
||||
text = _collapseWhitespace(text)
|
||||
if maxChars and len(text) > maxChars:
|
||||
text = text[:maxChars].rstrip() + "…"
|
||||
return text
|
||||
|
|
@ -302,6 +302,30 @@ async def _executeWithRetry(executor, node, context, maxRetries: int = 0, retryD
|
|||
raise lastError
|
||||
|
||||
|
||||
def _substituteFeatureInstancePlaceholders(
|
||||
graph: Dict[str, Any],
|
||||
targetFeatureInstanceId: str,
|
||||
) -> Dict[str, Any]:
|
||||
"""Replace ``{{featureInstanceId}}`` placeholders in the serialised graph.
|
||||
|
||||
Works on the full JSON representation so that placeholders inside nested
|
||||
parameter dicts, prompt strings, etc. are all caught. Already-resolved
|
||||
concrete UUIDs (pre-baked by ``_copyTemplateWorkflows``) are left untouched
|
||||
because the placeholder literal ``{{featureInstanceId}}`` will not match.
|
||||
"""
|
||||
import json as _json
|
||||
raw = _json.dumps(graph)
|
||||
if "{{featureInstanceId}}" not in raw:
|
||||
return graph
|
||||
replaced = raw.replace("{{featureInstanceId}}", targetFeatureInstanceId)
|
||||
logger.debug(
|
||||
"_substituteFeatureInstancePlaceholders: resolved %d occurrence(s) -> %s",
|
||||
raw.count("{{featureInstanceId}}"),
|
||||
targetFeatureInstanceId,
|
||||
)
|
||||
return _json.loads(replaced)
|
||||
|
||||
|
||||
async def executeGraph(
|
||||
graph: Dict[str, Any],
|
||||
services: Any,
|
||||
|
|
@ -315,6 +339,7 @@ async def executeGraph(
|
|||
runId: Optional[str] = None,
|
||||
run_envelope: Optional[Dict[str, Any]] = None,
|
||||
label: Optional[str] = None,
|
||||
targetFeatureInstanceId: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute automation2 graph. Returns { success, nodeOutputs, error?, stopped? }.
|
||||
|
|
@ -322,14 +347,16 @@ async def executeGraph(
|
|||
pauses the run, and returns { success: False, paused: True, taskId, runId }.
|
||||
For resume: pass initialNodeOutputs (with result for the human node) and startAfterNodeId.
|
||||
For fresh runs: pass run_envelope (unified start payload for the start node); normalized with userId into context.runEnvelope.
|
||||
targetFeatureInstanceId: resolves {{featureInstanceId}} placeholders in the graph JSON before execution.
|
||||
"""
|
||||
logger.info(
|
||||
"executeGraph start: instanceId=%s workflowId=%s userId=%s mandateId=%s resume=%s",
|
||||
"executeGraph start: instanceId=%s workflowId=%s userId=%s mandateId=%s resume=%s targetInstance=%s",
|
||||
instanceId,
|
||||
workflowId,
|
||||
userId,
|
||||
mandateId,
|
||||
startAfterNodeId is not None,
|
||||
targetFeatureInstanceId,
|
||||
)
|
||||
from modules.workflows.processing.shared.methodDiscovery import discoverMethods
|
||||
discoverMethods(services)
|
||||
|
|
@ -338,6 +365,9 @@ async def executeGraph(
|
|||
materializeFeatureInstanceRefs,
|
||||
)
|
||||
|
||||
if targetFeatureInstanceId:
|
||||
graph = _substituteFeatureInstancePlaceholders(graph, targetFeatureInstanceId)
|
||||
|
||||
# Phase-5 Schicht-4: typed-ref envelopes are materialized FIRST so the
|
||||
# subsequent connection-ref pass and validation see the canonical shape.
|
||||
graph = materializeFeatureInstanceRefs(graph)
|
||||
|
|
|
|||
|
|
@ -326,11 +326,25 @@ class ActionNodeExecutor:
|
|||
if isinstance(dumped, dict) and isinstance(rawData, bytes) and len(rawData) > 0:
|
||||
try:
|
||||
from modules.interfaces.interfaceDbManagement import getInterface as _getMgmtInterface
|
||||
from modules.interfaces.interfaceDbApp import getInterface as _getAppInterface
|
||||
from modules.security.rootAccess import getRootUser
|
||||
_userId = context.get("userId")
|
||||
_mandateId = context.get("mandateId")
|
||||
_instanceId = context.get("instanceId")
|
||||
_mgmt = _getMgmtInterface(getRootUser(), mandateId=_mandateId, featureInstanceId=_instanceId)
|
||||
_owner = None
|
||||
if _userId:
|
||||
try:
|
||||
_umap = _getAppInterface(getRootUser()).getUsersByIds([str(_userId)])
|
||||
_owner = _umap.get(str(_userId))
|
||||
except Exception as _ue:
|
||||
logger.warning("Could not resolve workflow user for file persistence: %s", _ue)
|
||||
if _owner is None:
|
||||
_owner = getRootUser()
|
||||
logger.debug(
|
||||
"Persisting workflow document as root user (no resolved owner userId=%r)",
|
||||
_userId,
|
||||
)
|
||||
_mgmt = _getMgmtInterface(_owner, mandateId=_mandateId, featureInstanceId=_instanceId)
|
||||
_docName = dumped.get("documentName") or f"workflow-result-{nodeId}.bin"
|
||||
_mimeType = dumped.get("mimeType") or "application/octet-stream"
|
||||
_fileItem = _mgmt.createFile(_docName, _mimeType, rawData)
|
||||
|
|
@ -345,6 +359,20 @@ class ActionNodeExecutor:
|
|||
dumped["_hasBinaryData"] = True
|
||||
docsList.append(dumped)
|
||||
|
||||
# Clean DocumentList shape for document nodes (match file.create: documents + count, no AiResult fields)
|
||||
if outputSchema == "DocumentList" and nodeType in ("ai.generateDocument", "ai.convertDocument"):
|
||||
if not result.success:
|
||||
return _normalizeError(
|
||||
RuntimeError(str(result.error or "document action failed")),
|
||||
outputSchema,
|
||||
)
|
||||
list_out: Dict[str, Any] = {
|
||||
"documents": docsList,
|
||||
"count": len(docsList),
|
||||
}
|
||||
_attachConnectionProvenance(list_out, resolvedParams, outputSchema, chatService, self.services)
|
||||
return normalizeToSchema(list_out, outputSchema)
|
||||
|
||||
extractedContext = ""
|
||||
if result.documents:
|
||||
doc = result.documents[0]
|
||||
|
|
|
|||
|
|
@ -7,6 +7,50 @@ from typing import Dict, List, Any, Tuple, Set, Optional
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _ai_result_text_from_documents(d: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract plain-text body from AiResult-style ``documents[0].documentData``."""
|
||||
docs = d.get("documents")
|
||||
if not isinstance(docs, list) or not docs:
|
||||
return None
|
||||
d0 = docs[0]
|
||||
raw: Any = None
|
||||
if isinstance(d0, dict):
|
||||
raw = d0.get("documentData")
|
||||
elif d0 is not None:
|
||||
raw = getattr(d0, "documentData", None)
|
||||
if raw is None:
|
||||
return None
|
||||
if isinstance(raw, bytes):
|
||||
try:
|
||||
t = raw.decode("utf-8").strip()
|
||||
return t or None
|
||||
except (UnicodeDecodeError, ValueError):
|
||||
return None
|
||||
if isinstance(raw, str):
|
||||
s = raw.strip()
|
||||
return s or None
|
||||
return None
|
||||
|
||||
|
||||
def _ref_coalesce_empty_ai_result_text(data: Any, path: List[Any], resolved: Any) -> Any:
|
||||
"""If a ref targets AiResult text fields but resolves empty/missing, fall back to documents.
|
||||
|
||||
Needed when: optional ``responseData`` is absent (no synthetic ``{}``), ``response`` is
|
||||
still empty but ``documents`` hold the model output, or legacy graphs bind responseData only.
|
||||
"""
|
||||
if resolved not in (None, ""):
|
||||
return resolved
|
||||
if not isinstance(data, dict) or not path:
|
||||
return resolved
|
||||
head = path[0]
|
||||
if head not in ("response", "responseData", "context"):
|
||||
return resolved
|
||||
if head == "context" and len(path) != 1:
|
||||
return resolved
|
||||
fb = _ai_result_text_from_documents(data)
|
||||
return fb if fb is not None else resolved
|
||||
|
||||
|
||||
def parseGraph(graph: Dict[str, Any]) -> Tuple[List[Dict], List[Dict], Set[str]]:
|
||||
"""
|
||||
Parse graph into nodes, connections, and node IDs.
|
||||
|
|
@ -356,14 +400,15 @@ def resolveParameterReferences(value: Any, nodeOutputs: Dict[str, Any]) -> Any:
|
|||
data = data.get("data", data)
|
||||
plist = list(path)
|
||||
resolved = _get_by_path(data, plist)
|
||||
if (
|
||||
resolved is None
|
||||
and isinstance(data, dict)
|
||||
and plist
|
||||
and plist[0] == "payload"
|
||||
and len(plist) > 1
|
||||
):
|
||||
resolved = _get_by_path(data, plist[1:])
|
||||
if resolved is None and isinstance(data, dict) and plist:
|
||||
if plist[0] == "payload" and len(plist) > 1:
|
||||
# Strip explicit "payload" prefix (legacy DataPicker paths)
|
||||
resolved = _get_by_path(data, plist[1:])
|
||||
elif "payload" in data and isinstance(data["payload"], dict):
|
||||
# Form nodes store fields under {"payload": {fieldName: …}}.
|
||||
# DataPicker emits bare field paths like ["url"]; try under payload.
|
||||
resolved = _get_by_path(data["payload"], plist)
|
||||
resolved = _ref_coalesce_empty_ai_result_text(data, plist, resolved)
|
||||
return resolveParameterReferences(resolved, nodeOutputs)
|
||||
return value
|
||||
if value.get("type") == "value":
|
||||
|
|
@ -386,17 +431,34 @@ def resolveParameterReferences(value: Any, nodeOutputs: Dict[str, Any]) -> Any:
|
|||
if len(parts) < 2:
|
||||
return json.dumps(data) if isinstance(data, (dict, list)) else str(data)
|
||||
rest = ".".join(parts[1:])
|
||||
if data is None:
|
||||
|
||||
def _walk(root, keys):
|
||||
cur = root
|
||||
for k in keys:
|
||||
if isinstance(cur, dict) and k in cur:
|
||||
cur = cur[k]
|
||||
elif isinstance(cur, (list, tuple)) and k.isdigit():
|
||||
cur = cur[int(k)]
|
||||
else:
|
||||
return None
|
||||
return cur
|
||||
|
||||
keys = rest.split(".")
|
||||
result = _walk(data, keys)
|
||||
# Form nodes store fields under {"payload": {field: …}}.
|
||||
# Fall back to looking under "payload" when the direct path misses.
|
||||
if result is None and isinstance(data, dict) and "payload" in data:
|
||||
result = _walk(data["payload"], keys)
|
||||
if result is None:
|
||||
return m.group(0)
|
||||
for k in rest.split("."):
|
||||
if isinstance(data, dict) and k in data:
|
||||
data = data[k]
|
||||
elif isinstance(data, (list, tuple)) and k.isdigit():
|
||||
data = data[int(k)]
|
||||
else:
|
||||
return m.group(0)
|
||||
return str(data) if data is not None else m.group(0)
|
||||
return str(result) if not isinstance(result, (dict, list)) else json.dumps(result, ensure_ascii=False)
|
||||
return re.sub(r"\{\{\s*([^}]+)\s*\}\}", repl, value)
|
||||
if isinstance(value, list):
|
||||
# contextBuilder: list where every item is a `{"type":"ref",...}` envelope.
|
||||
# Resolve each ref and join the serialised parts into a single prompt string.
|
||||
if value and all(isinstance(v, dict) and v.get("type") == "ref" for v in value):
|
||||
from modules.workflows.methods.methodAi._common import serialize_context
|
||||
parts = [serialize_context(resolveParameterReferences(v, nodeOutputs)) for v in value]
|
||||
return "\n\n".join(p for p in parts if p)
|
||||
return [resolveParameterReferences(v, nodeOutputs) for v in value]
|
||||
return value
|
||||
|
|
|
|||
42
modules/workflows/methods/methodAi/_common.py
Normal file
42
modules/workflows/methods/methodAi/_common.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""Shared helpers for AI workflow actions."""
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
|
||||
def serialize_context(val: Any) -> str:
|
||||
"""Convert any context value to a readable string for use in AI prompts.
|
||||
|
||||
- None / empty string → ""
|
||||
- empty dict (no keys) → "" (avoids literal "{}" in file.create / prompts)
|
||||
- str → as-is
|
||||
- dict / list → pretty-printed JSON
|
||||
- anything else → str()
|
||||
"""
|
||||
if val is None or val == "" or val == []:
|
||||
return ""
|
||||
if isinstance(val, dict) and len(val) == 0:
|
||||
return ""
|
||||
if isinstance(val, str):
|
||||
return val.strip()
|
||||
try:
|
||||
return json.dumps(val, ensure_ascii=False, indent=2)
|
||||
except Exception:
|
||||
return str(val)
|
||||
|
||||
|
||||
def applyCommonAiParams(parameters: dict, request) -> None:
|
||||
"""Apply common AI parameters (requireNeutralization, allowedModels) from node to request."""
|
||||
requireNeutralization = parameters.get("requireNeutralization")
|
||||
if requireNeutralization is not None:
|
||||
request.requireNeutralization = bool(requireNeutralization)
|
||||
|
||||
allowedModels = parameters.get("allowedModels")
|
||||
if allowedModels and isinstance(allowedModels, list):
|
||||
if not request.options:
|
||||
from modules.datamodels.datamodelAi import AiCallOptions
|
||||
request.options = AiCallOptions()
|
||||
request.options.allowedModels = allowedModels
|
||||
|
|
@ -67,6 +67,8 @@ async def consolidate(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
prompt=prompt,
|
||||
options=AiCallOptions(operationType=OperationTypeEnum.DATA_ANALYSE),
|
||||
)
|
||||
from modules.workflows.methods.methodAi._common import applyCommonAiParams
|
||||
applyCommonAiParams(parameters, req)
|
||||
resp = await ai_service.callAi(req)
|
||||
except (SubscriptionInactiveException, BillingContextError):
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -36,6 +36,10 @@ async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
}
|
||||
if parentOperationId:
|
||||
processParams["parentOperationId"] = parentOperationId
|
||||
if parameters.get("allowedModels"):
|
||||
processParams["allowedModels"] = parameters["allowedModels"]
|
||||
if parameters.get("requireNeutralization") is not None:
|
||||
processParams["requireNeutralization"] = parameters["requireNeutralization"]
|
||||
|
||||
return await self.process(processParams)
|
||||
|
||||
|
|
|
|||
|
|
@ -14,8 +14,11 @@ from modules.serviceCenter.services.serviceBilling.mainServiceBilling import Bil
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
prompt = parameters.get("prompt")
|
||||
if not prompt:
|
||||
from modules.workflows.methods.methodAi._common import serialize_context
|
||||
base_prompt = (parameters.get("prompt") or "").strip()
|
||||
context_val = serialize_context(parameters.get("context"))
|
||||
prompt = f"Kontext:\n{context_val}\n\n{base_prompt}" if context_val else base_prompt
|
||||
if not prompt.strip():
|
||||
return ActionResult.isFailure(error="prompt is required")
|
||||
|
||||
documentList = parameters.get("documentList", [])
|
||||
|
|
@ -55,6 +58,16 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
|
||||
# Apply node-level AI params
|
||||
allowedModels = parameters.get("allowedModels")
|
||||
if allowedModels and isinstance(allowedModels, list):
|
||||
options.allowedModels = allowedModels
|
||||
requireNeutralization = parameters.get("requireNeutralization")
|
||||
if requireNeutralization is not None:
|
||||
_ctx = getattr(self.services, '_context', None)
|
||||
if _ctx:
|
||||
_ctx.requireNeutralization = bool(requireNeutralization)
|
||||
|
||||
# outputFormat: Optional - if None, formats determined from prompt by AI
|
||||
aiResponse: AiResponse = await self.services.ai.callAiContent(
|
||||
prompt=prompt,
|
||||
|
|
|
|||
|
|
@ -14,14 +14,19 @@ from modules.serviceCenter.services.serviceBilling.mainServiceBilling import Bil
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
prompt = parameters.get("prompt")
|
||||
if not prompt:
|
||||
from modules.workflows.methods.methodAi._common import serialize_context
|
||||
base_prompt = (parameters.get("prompt") or "").strip()
|
||||
context_val = serialize_context(parameters.get("context"))
|
||||
prompt = f"Kontext:\n{context_val}\n\n{base_prompt}" if context_val else base_prompt
|
||||
if not prompt.strip():
|
||||
return ActionResult.isFailure(error="prompt is required")
|
||||
|
||||
documentList = parameters.get("documentList", [])
|
||||
documentType = parameters.get("documentType")
|
||||
# Optional: if omitted, formats determined from prompt by AI
|
||||
resultType = parameters.get("resultType")
|
||||
# Prefer explicit outputFormat (flow UI); resultType remains for legacy / API callers.
|
||||
resultType = parameters.get("outputFormat") or parameters.get("resultType")
|
||||
if isinstance(resultType, str):
|
||||
resultType = resultType.strip().lstrip(".").lower() or None
|
||||
|
||||
if not resultType:
|
||||
logger.debug("resultType not provided - formats will be determined from prompt by AI")
|
||||
|
|
@ -46,8 +51,12 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
else:
|
||||
docRefList = DocumentReferenceList(references=[])
|
||||
|
||||
# Prepare title
|
||||
title = parameters.get("documentType") or "Generated Document"
|
||||
title_raw = parameters.get("title")
|
||||
title = (title_raw.strip() if isinstance(title_raw, str) else "") or None
|
||||
if not title and isinstance(documentType, str) and documentType.strip():
|
||||
title = documentType.strip()
|
||||
if not title:
|
||||
title = "Generated Document"
|
||||
|
||||
# Call AI service for document generation
|
||||
# callAiContent handles documentList internally via Phases 5A-5E
|
||||
|
|
@ -59,6 +68,16 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
compressContext=False
|
||||
)
|
||||
|
||||
# Apply node-level AI params
|
||||
allowedModels = parameters.get("allowedModels")
|
||||
if allowedModels and isinstance(allowedModels, list):
|
||||
options.allowedModels = allowedModels
|
||||
requireNeutralization = parameters.get("requireNeutralization")
|
||||
if requireNeutralization is not None:
|
||||
_ctx = getattr(self.services, '_context', None)
|
||||
if _ctx:
|
||||
_ctx.requireNeutralization = bool(requireNeutralization)
|
||||
|
||||
# outputFormat: Optional - if None, formats determined from prompt by AI
|
||||
aiResponse: AiResponse = await self.services.ai.callAiContent(
|
||||
prompt=prompt,
|
||||
|
|
@ -85,6 +104,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
"actionType": "ai.generateDocument",
|
||||
"documentType": documentType,
|
||||
"resultType": resultType,
|
||||
"outputFormat": resultType,
|
||||
"title": title,
|
||||
}
|
||||
))
|
||||
|
||||
|
|
@ -106,14 +127,15 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
docName = sanitized
|
||||
|
||||
# Determine mime type
|
||||
rt = resultTypeFallback
|
||||
mimeType = "text/plain"
|
||||
if resultType == "html":
|
||||
if rt == "html":
|
||||
mimeType = "text/html"
|
||||
elif resultType == "json":
|
||||
elif rt == "json":
|
||||
mimeType = "application/json"
|
||||
elif resultType == "pdf":
|
||||
elif rt == "pdf":
|
||||
mimeType = "application/pdf"
|
||||
elif resultType == "md":
|
||||
elif rt == "md":
|
||||
mimeType = "text/markdown"
|
||||
|
||||
documents.append(ActionDocument(
|
||||
|
|
@ -124,6 +146,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
"actionType": "ai.generateDocument",
|
||||
"documentType": documentType,
|
||||
"resultType": resultType,
|
||||
"outputFormat": resultType,
|
||||
"title": title,
|
||||
}
|
||||
))
|
||||
|
||||
|
|
|
|||
|
|
@ -73,6 +73,47 @@ def _action_docs_to_content_parts(services, docs: List[Any]) -> List[ContentPart
|
|||
logger.info(f"ai.process: Extracted {len(ec.parts)} parts from {name} (no persistence)")
|
||||
return all_parts
|
||||
|
||||
def _resolve_file_refs_to_content_parts(services, fileIdRefs) -> List[ContentPart]:
|
||||
"""Fetch files by ID from the file store and extract content.
|
||||
Used for automation2 workflows where documents are file-store references,
|
||||
not chat message attachments."""
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
|
||||
|
||||
mgmt = getattr(services, 'interfaceDbComponent', None)
|
||||
extraction = getattr(services, 'extraction', None)
|
||||
if not mgmt or not extraction:
|
||||
logger.warning("_resolve_file_refs_to_content_parts: missing interfaceDbComponent or extraction service")
|
||||
return []
|
||||
|
||||
allParts: List[ContentPart] = []
|
||||
opts = ExtractionOptions(prompt="", mergeStrategy=MergeStrategy())
|
||||
for ref in fileIdRefs:
|
||||
fileId = ref.documentId
|
||||
fileMeta = mgmt.getFile(fileId)
|
||||
if not fileMeta:
|
||||
logger.warning(f"_resolve_file_refs_to_content_parts: file {fileId} not found")
|
||||
continue
|
||||
fileData = mgmt.getFileData(fileId)
|
||||
if not fileData:
|
||||
logger.warning(f"_resolve_file_refs_to_content_parts: no data for file {fileId}")
|
||||
continue
|
||||
fileName = getattr(fileMeta, 'fileName', fileId)
|
||||
mimeType = getattr(fileMeta, 'mimeType', 'application/octet-stream')
|
||||
ec = extraction.extractContentFromBytes(
|
||||
documentBytes=fileData,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
documentId=fileId,
|
||||
options=opts,
|
||||
)
|
||||
for p in ec.parts:
|
||||
if p.data or getattr(p, "typeGroup", "") == "image":
|
||||
p.metadata.setdefault("originalFileName", fileName)
|
||||
allParts.append(p)
|
||||
logger.info(f"_resolve_file_refs_to_content_parts: extracted {len(ec.parts)} parts from {fileName}")
|
||||
return allParts
|
||||
|
||||
|
||||
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
operationId = None
|
||||
try:
|
||||
|
|
@ -129,6 +170,17 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
f"ai.process: Coerced documentList ({type(documentListParam).__name__}) "
|
||||
f"to DocumentReferenceList with {len(documentList.references)} references"
|
||||
)
|
||||
|
||||
# Resolve DocumentItemReferences (file-ID refs from automation2) directly
|
||||
# from the file store. These cannot be resolved via chat messages.
|
||||
from modules.datamodels.datamodelDocref import DocumentItemReference
|
||||
fileIdRefs = [r for r in documentList.references if isinstance(r, DocumentItemReference)]
|
||||
if fileIdRefs:
|
||||
extractedParts = _resolve_file_refs_to_content_parts(self.services, fileIdRefs)
|
||||
if extractedParts:
|
||||
inline_content_parts = (inline_content_parts or []) + extractedParts
|
||||
remaining = [r for r in documentList.references if not isinstance(r, DocumentItemReference)]
|
||||
documentList = DocumentReferenceList(references=remaining)
|
||||
|
||||
# Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
|
||||
resultType = parameters.get("resultType")
|
||||
|
|
@ -157,7 +209,14 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
|
||||
mimeMap = {"txt": "text/plain", "json": "application/json", "html": "text/html", "md": "text/markdown", "csv": "text/csv", "xml": "application/xml"}
|
||||
output_mime_type = mimeMap.get(normalized_result_type, "text/plain") if normalized_result_type else "text/plain"
|
||||
|
||||
|
||||
# Normalize context: serialize any non-string value (dict/list/int/…) to text
|
||||
from modules.workflows.methods.methodAi._common import serialize_context
|
||||
paramContext = serialize_context(parameters.get("context"))
|
||||
parameters["context"] = paramContext
|
||||
if paramContext:
|
||||
logger.info(f"ai.process: context serialized ({len(paramContext)} chars)")
|
||||
|
||||
# Phase 7.3: Pass documentList and/or contentParts to AI service
|
||||
contentParts: Optional[List[ContentPart]] = inline_content_parts
|
||||
if "contentParts" in parameters and not inline_content_parts:
|
||||
|
|
@ -183,7 +242,7 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI (simple mode)")
|
||||
|
||||
context_parts = []
|
||||
paramContext = parameters.get("context")
|
||||
paramContext = parameters.get("context") # already serialized above
|
||||
if paramContext and isinstance(paramContext, str) and paramContext.strip():
|
||||
context_parts.append(paramContext.strip())
|
||||
if documentList and len(documentList.references) > 0:
|
||||
|
|
@ -212,6 +271,9 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
)
|
||||
)
|
||||
|
||||
from modules.workflows.methods.methodAi._common import applyCommonAiParams
|
||||
applyCommonAiParams(parameters, request)
|
||||
|
||||
aiResponse_obj = await self.services.ai.callAi(request)
|
||||
|
||||
# Convert AiCallResponse to AiResponse format
|
||||
|
|
@ -243,6 +305,16 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
|
||||
)
|
||||
|
||||
# Apply node-level AI params (allowedModels, requireNeutralization)
|
||||
allowedModels = parameters.get("allowedModels")
|
||||
if allowedModels and isinstance(allowedModels, list):
|
||||
options.allowedModels = allowedModels
|
||||
requireNeutralization = parameters.get("requireNeutralization")
|
||||
if requireNeutralization is not None:
|
||||
_ctx = getattr(self.services, '_context', None)
|
||||
if _ctx:
|
||||
_ctx.requireNeutralization = bool(requireNeutralization)
|
||||
|
||||
# Get generationIntent from parameters (required for DATA_GENERATE)
|
||||
# Default to "document" if not provided (most common use case)
|
||||
# For code generation, use ai.generateCode action or explicitly pass generationIntent="code"
|
||||
|
|
|
|||
|
|
@ -39,6 +39,10 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
}
|
||||
if parentOperationId:
|
||||
processParams["parentOperationId"] = parentOperationId
|
||||
if parameters.get("allowedModels"):
|
||||
processParams["allowedModels"] = parameters["allowedModels"]
|
||||
if parameters.get("requireNeutralization") is not None:
|
||||
processParams["requireNeutralization"] = parameters["requireNeutralization"]
|
||||
|
||||
return await self.process(processParams)
|
||||
|
||||
|
|
|
|||
|
|
@ -41,6 +41,10 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
processParams["resultType"] = resultType
|
||||
if parentOperationId:
|
||||
processParams["parentOperationId"] = parentOperationId
|
||||
if parameters.get("allowedModels"):
|
||||
processParams["allowedModels"] = parameters["allowedModels"]
|
||||
if parameters.get("requireNeutralization") is not None:
|
||||
processParams["requireNeutralization"] = parameters["requireNeutralization"]
|
||||
|
||||
return await self.process(processParams)
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue