gateway/modules/datamodels/datamodelDocref.py
2025-12-15 21:55:26 +01:00

120 lines
4.3 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Document reference models for typed document references in workflows.
"""
from typing import List, Optional
from pydantic import BaseModel, Field
from modules.shared.attributeUtils import registerModelLabels
class DocumentReference(BaseModel):
"""Base class for document references"""
pass
class DocumentListReference(DocumentReference):
"""Reference to a document list via message label"""
messageId: Optional[str] = Field(None, description="Optional message ID for cross-round references")
label: str = Field(description="Document list label")
def to_string(self) -> str:
"""Convert to string format: docList:messageId:label or docList:label"""
if self.messageId:
return f"docList:{self.messageId}:{self.label}"
return f"docList:{self.label}"
class DocumentItemReference(DocumentReference):
"""Reference to a specific document item"""
documentId: str = Field(description="Document ID")
fileName: Optional[str] = Field(None, description="Optional file name")
def to_string(self) -> str:
"""Convert to string format: docItem:documentId:fileName or docItem:documentId"""
if self.fileName:
return f"docItem:{self.documentId}:{self.fileName}"
return f"docItem:{self.documentId}"
class DocumentReferenceList(BaseModel):
"""List of document references with conversion methods"""
references: List[DocumentReference] = Field(
default_factory=list,
description="List of document references"
)
def to_string_list(self) -> List[str]:
"""Convert all references to string list"""
return [ref.to_string() for ref in self.references]
@classmethod
def from_string_list(cls, stringList: List[str]) -> "DocumentReferenceList":
"""Parse string list to typed references
Supports formats:
- docList:label
- docList:messageId:label
- docItem:documentId
- docItem:documentId:fileName
"""
references = []
for refStr in stringList:
if not refStr or not isinstance(refStr, str):
continue
refStr = refStr.strip()
# Parse docList: references
if refStr.startswith("docList:"):
parts = refStr[8:].split(":", 1) # Remove "docList:" prefix
if len(parts) == 2:
# docList:messageId:label
messageId, label = parts
references.append(DocumentListReference(messageId=messageId, label=label))
elif len(parts) == 1 and parts[0]:
# docList:label
references.append(DocumentListReference(label=parts[0]))
# Parse docItem: references
elif refStr.startswith("docItem:"):
parts = refStr[8:].split(":", 1) # Remove "docItem:" prefix
if len(parts) == 2:
# docItem:documentId:fileName
documentId, fileName = parts
references.append(DocumentItemReference(documentId=documentId, fileName=fileName))
elif len(parts) == 1 and parts[0]:
# docItem:documentId
references.append(DocumentItemReference(documentId=parts[0]))
# Unknown format - skip or log warning
else:
# Try to parse as simple string (backward compatibility)
# Assume it's a label if it doesn't match known patterns
if refStr:
references.append(DocumentListReference(label=refStr))
return cls(references=references)
registerModelLabels(
"DocumentReference",
{"en": "Document Reference", "fr": "Référence de document"},
{
"messageId": {"en": "Message ID", "fr": "ID du message"},
"label": {"en": "Label", "fr": "Étiquette"},
"documentId": {"en": "Document ID", "fr": "ID du document"},
"fileName": {"en": "File Name", "fr": "Nom du fichier"},
},
)
registerModelLabels(
"DocumentReferenceList",
{"en": "Document Reference List", "fr": "Liste de références de documents"},
{
"references": {"en": "References", "fr": "Références"},
},
)