# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Document reference models for typed document references in workflows. """ from typing import List, Optional from pydantic import BaseModel, Field from modules.shared.attributeUtils import registerModelLabels class DocumentReference(BaseModel): """Base class for document references""" pass class DocumentListReference(DocumentReference): """Reference to a document list via message label""" messageId: Optional[str] = Field(None, description="Optional message ID for cross-round references") label: str = Field(description="Document list label") def to_string(self) -> str: """Convert to string format: docList:messageId:label or docList:label""" if self.messageId: return f"docList:{self.messageId}:{self.label}" return f"docList:{self.label}" class DocumentItemReference(DocumentReference): """Reference to a specific document item""" documentId: str = Field(description="Document ID") fileName: Optional[str] = Field(None, description="Optional file name") def to_string(self) -> str: """Convert to string format: docItem:documentId:fileName or docItem:documentId""" if self.fileName: return f"docItem:{self.documentId}:{self.fileName}" return f"docItem:{self.documentId}" class DocumentReferenceList(BaseModel): """List of document references with conversion methods""" references: List[DocumentReference] = Field( default_factory=list, description="List of document references" ) def to_string_list(self) -> List[str]: """Convert all references to string list""" return [ref.to_string() for ref in self.references] @classmethod def from_string_list(cls, stringList: List[str]) -> "DocumentReferenceList": """Parse string list to typed references Supports formats: - docList:label - docList:messageId:label - docItem:documentId - docItem:documentId:fileName """ references = [] for refStr in stringList: if not refStr or not isinstance(refStr, str): continue refStr = refStr.strip() # Parse docList: references if refStr.startswith("docList:"): parts = refStr[8:].split(":", 1) # Remove "docList:" prefix if len(parts) == 2: # docList:messageId:label messageId, label = parts references.append(DocumentListReference(messageId=messageId, label=label)) elif len(parts) == 1 and parts[0]: # docList:label references.append(DocumentListReference(label=parts[0])) # Parse docItem: references elif refStr.startswith("docItem:"): parts = refStr[8:].split(":", 1) # Remove "docItem:" prefix if len(parts) == 2: # docItem:documentId:fileName documentId, fileName = parts references.append(DocumentItemReference(documentId=documentId, fileName=fileName)) elif len(parts) == 1 and parts[0]: # docItem:documentId references.append(DocumentItemReference(documentId=parts[0])) # Unknown format - skip or log warning else: # Try to parse as simple string (backward compatibility) # Assume it's a label if it doesn't match known patterns if refStr: references.append(DocumentListReference(label=refStr)) return cls(references=references) registerModelLabels( "DocumentReference", {"en": "Document Reference", "fr": "Référence de document"}, { "messageId": {"en": "Message ID", "fr": "ID du message"}, "label": {"en": "Label", "fr": "Étiquette"}, "documentId": {"en": "Document ID", "fr": "ID du document"}, "fileName": {"en": "File Name", "fr": "Nom du fichier"}, }, ) registerModelLabels( "DocumentReferenceList", {"en": "Document Reference List", "fr": "Liste de références de documents"}, { "references": {"en": "References", "fr": "Références"}, }, )