# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Document reference models for typed document references in workflows. """ import logging from typing import Any, List, Optional from pydantic import BaseModel, Field from modules.shared.i18nRegistry import i18nModel logger = logging.getLogger(__name__) class DocumentReference(BaseModel): """Base class for document references""" pass @i18nModel("Dokumentlisten-Referenz") class DocumentListReference(DocumentReference): """Reference to a document list via message label""" messageId: Optional[str] = Field( None, description="Optional message ID for cross-round references", json_schema_extra={"label": "Nachrichten-ID"}, ) label: str = Field( description="Document list label", json_schema_extra={"label": "Bezeichnung"}, ) def to_string(self) -> str: """Convert to string format: docList:messageId:label or docList:label""" if self.messageId: return f"docList:{self.messageId}:{self.label}" return f"docList:{self.label}" @i18nModel("Dokumentelement-Referenz") class DocumentItemReference(DocumentReference): """Reference to a specific document item""" documentId: str = Field( description="Document ID", json_schema_extra={"label": "Dokument-ID"}, ) fileName: Optional[str] = Field( None, description="Optional file name", json_schema_extra={"label": "Dateiname"}, ) def to_string(self) -> str: """Convert to string format: docItem:documentId:fileName or docItem:documentId""" if self.fileName: return f"docItem:{self.documentId}:{self.fileName}" return f"docItem:{self.documentId}" @i18nModel("Dokumentreferenz-Liste") class DocumentReferenceList(BaseModel): """List of document references with conversion methods""" references: List[DocumentReference] = Field( default_factory=list, description="List of document references", json_schema_extra={"label": "Referenzen"}, ) def to_string_list(self) -> List[str]: """Convert all references to string list""" return [ref.to_string() for ref in self.references] @classmethod def from_string_list(cls, stringList: List[str]) -> "DocumentReferenceList": """Parse string list to typed references Supports formats: - docList:label - docList:messageId:label - docItem:documentId - docItem:documentId:fileName """ references = [] for refStr in stringList: if not refStr or not isinstance(refStr, str): continue refStr = refStr.strip() # Parse docList: references if refStr.startswith("docList:"): parts = refStr[8:].split(":", 1) # Remove "docList:" prefix if len(parts) == 2: # docList:messageId:label messageId, label = parts references.append(DocumentListReference(messageId=messageId, label=label)) elif len(parts) == 1 and parts[0]: # docList:label references.append(DocumentListReference(label=parts[0])) # Parse docItem: references elif refStr.startswith("docItem:"): parts = refStr[8:].split(":", 1) # Remove "docItem:" prefix if len(parts) == 2: # docItem:documentId:fileName documentId, fileName = parts references.append(DocumentItemReference(documentId=documentId, fileName=fileName)) elif len(parts) == 1 and parts[0]: # docItem:documentId references.append(DocumentItemReference(documentId=parts[0])) # Unknown format - skip or log warning else: # Try to parse as simple string (backward compatibility) # Assume it's a label if it doesn't match known patterns if refStr: references.append(DocumentListReference(label=refStr)) return cls(references=references) def coerceDocumentReferenceList(value: Any) -> DocumentReferenceList: """Tolerant coercion of any agent/UI-supplied document list to :class:`DocumentReferenceList`. Accepts the canonical formats plus the dict-wrapper shapes that LLM tool-callers tend to generate when they see a ``type=DocumentList`` parameter: * ``None`` / ``""`` -> empty list * :class:`DocumentReferenceList` -> as-is * ``str`` -> single-element string list * ``list[str]`` -> :meth:`from_string_list` * ``list[dict]`` with ``id`` or ``documentId`` -> item references * ``{"documents": [...]}`` / ``{"references": [...]}`` -> recurse into the inner list (this is the shape LLMs love) * ``{"id": "..."}`` / ``{"documentId": "..."}`` -> single item reference * any unrecognised input -> empty list with a WARN log; never raises (the caller decides whether an empty list is fatal). """ if value is None or value == "": return DocumentReferenceList(references=[]) if isinstance(value, DocumentReferenceList): return value if isinstance(value, str): return DocumentReferenceList.from_string_list([value]) if isinstance(value, dict): for innerKey in ("documents", "references", "items", "files"): if innerKey in value and isinstance(value[innerKey], list): return coerceDocumentReferenceList(value[innerKey]) docId = value.get("documentId") or value.get("id") if docId: return DocumentReferenceList(references=[ DocumentItemReference( documentId=str(docId), fileName=value.get("fileName") or value.get("name"), ) ]) logger.warning( f"coerceDocumentReferenceList: unsupported dict shape " f"(keys={list(value.keys())}); returning empty list." ) return DocumentReferenceList(references=[]) if isinstance(value, list): if not value: return DocumentReferenceList(references=[]) first = value[0] if isinstance(first, str): return DocumentReferenceList.from_string_list(value) if isinstance(first, dict): references: List[DocumentReference] = [] for item in value: if not isinstance(item, dict): continue docId = item.get("documentId") or item.get("id") if docId: references.append(DocumentItemReference( documentId=str(docId), fileName=item.get("fileName") or item.get("name"), )) elif item.get("label"): references.append(DocumentListReference( label=str(item["label"]), messageId=item.get("messageId"), )) return DocumentReferenceList(references=references) # Mixed/object list (e.g. inline ActionDocument-like): caller # must pre-handle that case before calling this coercer. logger.warning( f"coerceDocumentReferenceList: list element type " f"{type(first).__name__} not recognised; returning empty list." ) return DocumentReferenceList(references=[]) logger.warning( f"coerceDocumentReferenceList: unsupported value type " f"{type(value).__name__}; returning empty list." ) return DocumentReferenceList(references=[])