gateway/modules/datamodels/datamodelDocref.py
2025-11-17 23:12:18 +01:00

118 lines
4.3 KiB
Python

"""
Document reference models for typed document references in workflows.
"""
from typing import List, Optional
from pydantic import BaseModel, Field
from modules.shared.attributeUtils import registerModelLabels
class DocumentReference(BaseModel):
"""Base class for document references"""
pass
class DocumentListReference(DocumentReference):
"""Reference to a document list via message label"""
messageId: Optional[str] = Field(None, description="Optional message ID for cross-round references")
label: str = Field(description="Document list label")
def to_string(self) -> str:
"""Convert to string format: docList:messageId:label or docList:label"""
if self.messageId:
return f"docList:{self.messageId}:{self.label}"
return f"docList:{self.label}"
class DocumentItemReference(DocumentReference):
"""Reference to a specific document item"""
documentId: str = Field(description="Document ID")
fileName: Optional[str] = Field(None, description="Optional file name")
def to_string(self) -> str:
"""Convert to string format: docItem:documentId:fileName or docItem:documentId"""
if self.fileName:
return f"docItem:{self.documentId}:{self.fileName}"
return f"docItem:{self.documentId}"
class DocumentReferenceList(BaseModel):
"""List of document references with conversion methods"""
references: List[DocumentReference] = Field(
default_factory=list,
description="List of document references"
)
def to_string_list(self) -> List[str]:
"""Convert all references to string list"""
return [ref.to_string() for ref in self.references]
@classmethod
def from_string_list(cls, stringList: List[str]) -> "DocumentReferenceList":
"""Parse string list to typed references
Supports formats:
- docList:label
- docList:messageId:label
- docItem:documentId
- docItem:documentId:fileName
"""
references = []
for refStr in stringList:
if not refStr or not isinstance(refStr, str):
continue
refStr = refStr.strip()
# Parse docList: references
if refStr.startswith("docList:"):
parts = refStr[8:].split(":", 1) # Remove "docList:" prefix
if len(parts) == 2:
# docList:messageId:label
messageId, label = parts
references.append(DocumentListReference(messageId=messageId, label=label))
elif len(parts) == 1 and parts[0]:
# docList:label
references.append(DocumentListReference(label=parts[0]))
# Parse docItem: references
elif refStr.startswith("docItem:"):
parts = refStr[8:].split(":", 1) # Remove "docItem:" prefix
if len(parts) == 2:
# docItem:documentId:fileName
documentId, fileName = parts
references.append(DocumentItemReference(documentId=documentId, fileName=fileName))
elif len(parts) == 1 and parts[0]:
# docItem:documentId
references.append(DocumentItemReference(documentId=parts[0]))
# Unknown format - skip or log warning
else:
# Try to parse as simple string (backward compatibility)
# Assume it's a label if it doesn't match known patterns
if refStr:
references.append(DocumentListReference(label=refStr))
return cls(references=references)
registerModelLabels(
"DocumentReference",
{"en": "Document Reference", "fr": "Référence de document"},
{
"messageId": {"en": "Message ID", "fr": "ID du message"},
"label": {"en": "Label", "fr": "Étiquette"},
"documentId": {"en": "Document ID", "fr": "ID du document"},
"fileName": {"en": "File Name", "fr": "Nom du fichier"},
},
)
registerModelLabels(
"DocumentReferenceList",
{"en": "Document Reference List", "fr": "Liste de références de documents"},
{
"references": {"en": "References", "fr": "Références"},
},
)