203 lines
7.6 KiB
Python
203 lines
7.6 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Document reference models for typed document references in workflows.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Any, List, Optional
|
|
from pydantic import BaseModel, Field
|
|
from modules.shared.i18nRegistry import i18nModel
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class DocumentReference(BaseModel):
|
|
"""Base class for document references"""
|
|
pass
|
|
|
|
|
|
@i18nModel("Dokumentlisten-Referenz")
|
|
class DocumentListReference(DocumentReference):
|
|
"""Reference to a document list via message label"""
|
|
messageId: Optional[str] = Field(
|
|
None,
|
|
description="Optional message ID for cross-round references",
|
|
json_schema_extra={"label": "Nachrichten-ID"},
|
|
)
|
|
label: str = Field(
|
|
description="Document list label",
|
|
json_schema_extra={"label": "Bezeichnung"},
|
|
)
|
|
|
|
def to_string(self) -> str:
|
|
"""Convert to string format: docList:messageId:label or docList:label"""
|
|
if self.messageId:
|
|
return f"docList:{self.messageId}:{self.label}"
|
|
return f"docList:{self.label}"
|
|
|
|
|
|
@i18nModel("Dokumentelement-Referenz")
|
|
class DocumentItemReference(DocumentReference):
|
|
"""Reference to a specific document item"""
|
|
documentId: str = Field(
|
|
description="Document ID",
|
|
json_schema_extra={"label": "Dokument-ID"},
|
|
)
|
|
fileName: Optional[str] = Field(
|
|
None,
|
|
description="Optional file name",
|
|
json_schema_extra={"label": "Dateiname"},
|
|
)
|
|
|
|
def to_string(self) -> str:
|
|
"""Convert to string format: docItem:documentId:fileName or docItem:documentId"""
|
|
if self.fileName:
|
|
return f"docItem:{self.documentId}:{self.fileName}"
|
|
return f"docItem:{self.documentId}"
|
|
|
|
|
|
@i18nModel("Dokumentreferenz-Liste")
|
|
class DocumentReferenceList(BaseModel):
|
|
"""List of document references with conversion methods"""
|
|
references: List[DocumentReference] = Field(
|
|
default_factory=list,
|
|
description="List of document references",
|
|
json_schema_extra={"label": "Referenzen"},
|
|
)
|
|
|
|
def to_string_list(self) -> List[str]:
|
|
"""Convert all references to string list"""
|
|
return [ref.to_string() for ref in self.references]
|
|
|
|
@classmethod
|
|
def from_string_list(cls, stringList: List[str]) -> "DocumentReferenceList":
|
|
"""Parse string list to typed references
|
|
|
|
Supports formats:
|
|
- docList:label
|
|
- docList:messageId:label
|
|
- docItem:documentId
|
|
- docItem:documentId:fileName
|
|
"""
|
|
references = []
|
|
|
|
for refStr in stringList:
|
|
if not refStr or not isinstance(refStr, str):
|
|
continue
|
|
|
|
refStr = refStr.strip()
|
|
|
|
# Parse docList: references
|
|
if refStr.startswith("docList:"):
|
|
parts = refStr[8:].split(":", 1) # Remove "docList:" prefix
|
|
if len(parts) == 2:
|
|
# docList:messageId:label
|
|
messageId, label = parts
|
|
references.append(DocumentListReference(messageId=messageId, label=label))
|
|
elif len(parts) == 1 and parts[0]:
|
|
# docList:label
|
|
references.append(DocumentListReference(label=parts[0]))
|
|
|
|
# Parse docItem: references
|
|
elif refStr.startswith("docItem:"):
|
|
parts = refStr[8:].split(":", 1) # Remove "docItem:" prefix
|
|
if len(parts) == 2:
|
|
# docItem:documentId:fileName
|
|
documentId, fileName = parts
|
|
references.append(DocumentItemReference(documentId=documentId, fileName=fileName))
|
|
elif len(parts) == 1 and parts[0]:
|
|
# docItem:documentId
|
|
references.append(DocumentItemReference(documentId=parts[0]))
|
|
|
|
# Unknown format - skip or log warning
|
|
else:
|
|
# Try to parse as simple string (backward compatibility)
|
|
# Assume it's a label if it doesn't match known patterns
|
|
if refStr:
|
|
references.append(DocumentListReference(label=refStr))
|
|
|
|
return cls(references=references)
|
|
|
|
|
|
def coerceDocumentReferenceList(value: Any) -> DocumentReferenceList:
|
|
"""Tolerant coercion of any agent/UI-supplied document list to
|
|
:class:`DocumentReferenceList`.
|
|
|
|
Accepts the canonical formats plus the dict-wrapper shapes that
|
|
LLM tool-callers tend to generate when they see a
|
|
``type=DocumentList`` parameter:
|
|
|
|
* ``None`` / ``""`` -> empty list
|
|
* :class:`DocumentReferenceList` -> as-is
|
|
* ``str`` -> single-element string list
|
|
* ``list[str]`` -> :meth:`from_string_list`
|
|
* ``list[dict]`` with ``id`` or ``documentId`` -> item references
|
|
* ``{"documents": [...]}`` / ``{"references": [...]}`` ->
|
|
recurse into the inner list (this is the shape LLMs love)
|
|
* ``{"id": "..."}`` / ``{"documentId": "..."}`` -> single
|
|
item reference
|
|
* any unrecognised input -> empty list with a WARN log; never
|
|
raises (the caller decides whether an empty list is fatal).
|
|
"""
|
|
if value is None or value == "":
|
|
return DocumentReferenceList(references=[])
|
|
if isinstance(value, DocumentReferenceList):
|
|
return value
|
|
if isinstance(value, str):
|
|
return DocumentReferenceList.from_string_list([value])
|
|
|
|
if isinstance(value, dict):
|
|
for innerKey in ("documents", "references", "items", "files"):
|
|
if innerKey in value and isinstance(value[innerKey], list):
|
|
return coerceDocumentReferenceList(value[innerKey])
|
|
docId = value.get("documentId") or value.get("id")
|
|
if docId:
|
|
return DocumentReferenceList(references=[
|
|
DocumentItemReference(
|
|
documentId=str(docId),
|
|
fileName=value.get("fileName") or value.get("name"),
|
|
)
|
|
])
|
|
logger.warning(
|
|
f"coerceDocumentReferenceList: unsupported dict shape "
|
|
f"(keys={list(value.keys())}); returning empty list."
|
|
)
|
|
return DocumentReferenceList(references=[])
|
|
|
|
if isinstance(value, list):
|
|
if not value:
|
|
return DocumentReferenceList(references=[])
|
|
first = value[0]
|
|
if isinstance(first, str):
|
|
return DocumentReferenceList.from_string_list(value)
|
|
if isinstance(first, dict):
|
|
references: List[DocumentReference] = []
|
|
for item in value:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
docId = item.get("documentId") or item.get("id")
|
|
if docId:
|
|
references.append(DocumentItemReference(
|
|
documentId=str(docId),
|
|
fileName=item.get("fileName") or item.get("name"),
|
|
))
|
|
elif item.get("label"):
|
|
references.append(DocumentListReference(
|
|
label=str(item["label"]),
|
|
messageId=item.get("messageId"),
|
|
))
|
|
return DocumentReferenceList(references=references)
|
|
# Mixed/object list (e.g. inline ActionDocument-like): caller
|
|
# must pre-handle that case before calling this coercer.
|
|
logger.warning(
|
|
f"coerceDocumentReferenceList: list element type "
|
|
f"{type(first).__name__} not recognised; returning empty list."
|
|
)
|
|
return DocumentReferenceList(references=[])
|
|
|
|
logger.warning(
|
|
f"coerceDocumentReferenceList: unsupported value type "
|
|
f"{type(value).__name__}; returning empty list."
|
|
)
|
|
return DocumentReferenceList(references=[])
|