# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Document Index helper for Context operations. Handles parsing and formatting of document indexes. """ import logging from typing import Dict, Any from datetime import datetime, UTC logger = logging.getLogger(__name__) class DocumentIndexHelper: """Helper for document index operations""" def __init__(self, methodInstance): """ Initialize document index helper. Args: methodInstance: Instance of MethodContext (for access to services) """ self.method = methodInstance self.services = methodInstance.services def parseDocumentIndex(self, documentsIndex: str, workflow: Any) -> Dict[str, Any]: """Parse the document index string into structured data.""" try: indexData = { "workflowId": getattr(workflow, 'id', 'unknown'), "generatedAt": datetime.now(UTC).isoformat(), "totalDocuments": 0, "rounds": [], "documentReferences": [] } # Extract document references from the index string lines = documentsIndex.split('\n') currentRound = None currentDocList = None for line in lines: line = line.strip() if not line: continue # Check for round headers if "Current round documents:" in line: currentRound = "current" continue elif "Past rounds documents:" in line: currentRound = "past" continue # Check for document list references (docList:...) if line.startswith("- docList:"): docListRef = line.replace("- docList:", "").strip() currentDocList = { "reference": docListRef, "round": currentRound, "documents": [] } indexData["rounds"].append(currentDocList) continue # Check for individual document references (docItem:...) if line.startswith(" - docItem:") or line.startswith("- docItem:"): docItemRef = line.replace(" - docItem:", "").replace("- docItem:", "").strip() indexData["documentReferences"].append({ "reference": docItemRef, "round": currentRound, "docList": currentDocList["reference"] if currentDocList else None }) indexData["totalDocuments"] += 1 if currentDocList: currentDocList["documents"].append(docItemRef) return indexData except Exception as e: logger.error(f"Error parsing document index: {str(e)}") return { "workflowId": getattr(workflow, 'id', 'unknown'), "error": f"Failed to parse document index: {str(e)}", "rawIndex": documentsIndex }