# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ CommCoach Session Indexer. Indexes coaching session data into the knowledge store (pgvector) for RAG-based long-term memory. Called after session completion to ensure semantic searchability across 20+ sessions. """ import logging import uuid import json from typing import List, Dict, Any, Optional logger = logging.getLogger(__name__) _COACHING_FILE_PREFIX = "coaching-session:" async def indexSessionData( sessionId: str, contextId: str, userId: str, featureInstanceId: str, mandateId: str, messages: List[Dict[str, Any]], summary: Optional[str], keyTopics: Optional[str], goals: Optional[List[Any]], insights: Optional[List[Any]], tasks: Optional[List[Dict[str, Any]]], contextTitle: str = "", knowledgeService=None, ): """Index a completed coaching session into the knowledge store. Creates ContentChunks with embeddings for: - Each User+Assistant message pair (maximum detail depth) - Session summary - Key topics (individually, for precise retrieval) - Current goals - New insights - Tasks (open + done) """ if not knowledgeService: logger.warning("No knowledge service available for coaching indexer") return syntheticFileId = f"{_COACHING_FILE_PREFIX}{sessionId}" chunks = [] # 1. Message pairs (User + Assistant) as individual chunks messagePairs = _extractMessagePairs(messages) for idx, pair in enumerate(messagePairs): chunks.append({ "contentObjectId": f"{sessionId}:msg-pair:{idx}", "data": pair["text"], "contextRef": { "containerPath": f"session:{sessionId}", "location": f"message-pair-{idx}", "type": "coaching-message-pair", "contextId": contextId, "sessionId": sessionId, "contextTitle": contextTitle, }, }) # 2. Session summary if summary: chunks.append({ "contentObjectId": f"{sessionId}:summary", "data": f"Session-Zusammenfassung ({contextTitle}): {summary}", "contextRef": { "containerPath": f"session:{sessionId}", "location": "summary", "type": "coaching-session-summary", "contextId": contextId, "sessionId": sessionId, "contextTitle": contextTitle, }, }) # 3. Key topics (each as separate chunk for precise retrieval) parsedTopics = _parseJsonSafe(keyTopics, []) for tidx, topic in enumerate(parsedTopics): topicStr = str(topic).strip() if topicStr: chunks.append({ "contentObjectId": f"{sessionId}:topic:{tidx}", "data": f"Coaching-Thema ({contextTitle}): {topicStr}", "contextRef": { "containerPath": f"session:{sessionId}", "location": f"topic-{tidx}", "type": "coaching-key-topic", "contextId": contextId, "sessionId": sessionId, "contextTitle": contextTitle, }, }) # 4. Goals if goals: goalTexts = [g.get("text", g) if isinstance(g, dict) else str(g) for g in goals if g] if goalTexts: goalsStr = "\n".join(f"- {g}" for g in goalTexts) chunks.append({ "contentObjectId": f"{sessionId}:goals", "data": f"Coaching-Ziele ({contextTitle}):\n{goalsStr}", "contextRef": { "containerPath": f"session:{sessionId}", "location": "goals", "type": "coaching-goals", "contextId": contextId, "sessionId": sessionId, "contextTitle": contextTitle, }, }) # 5. Insights if insights: insightTexts = [i.get("text", i) if isinstance(i, dict) else str(i) for i in insights if i] if insightTexts: insightsStr = "\n".join(f"- {t}" for t in insightTexts) chunks.append({ "contentObjectId": f"{sessionId}:insights", "data": f"Coaching-Erkenntnisse ({contextTitle}):\n{insightsStr}", "contextRef": { "containerPath": f"session:{sessionId}", "location": "insights", "type": "coaching-insights", "contextId": contextId, "sessionId": sessionId, "contextTitle": contextTitle, }, }) # 6. Tasks if tasks: taskLines = [] for t in tasks: status = t.get("status", "open") title = t.get("title", "") if title: taskLines.append(f"- [{status}] {title}") if taskLines: tasksStr = "\n".join(taskLines) chunks.append({ "contentObjectId": f"{sessionId}:tasks", "data": f"Coaching-Aufgaben ({contextTitle}):\n{tasksStr}", "contextRef": { "containerPath": f"session:{sessionId}", "location": "tasks", "type": "coaching-tasks", "contextId": contextId, "sessionId": sessionId, "contextTitle": contextTitle, }, }) if not chunks: logger.info(f"No chunks to index for session {sessionId}") return logger.info(f"Indexing {len(chunks)} chunks for coaching session {sessionId}") try: contentObjects = [ { "contentObjectId": c["contentObjectId"], "contentType": "text", "data": c["data"], "contextRef": c["contextRef"], } for c in chunks ] from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob await knowledgeService.requestIngestion( IngestionJob( sourceKind="coaching_session", sourceId=syntheticFileId, fileName=f"coaching-session-{sessionId[:8]}", mimeType="application/x-coaching-session", userId=userId, featureInstanceId=featureInstanceId, mandateId=mandateId, contentObjects=contentObjects, provenance={ "lane": "feature", "feature": "commcoach", "sessionId": sessionId, "contextId": contextId, "messageCount": len(messages or []), }, ) ) logger.info(f"Successfully indexed coaching session {sessionId} ({len(chunks)} chunks)") except Exception as e: logger.error(f"Failed to index coaching session {sessionId}: {e}", exc_info=True) def _extractMessagePairs(messages: List[Dict[str, Any]]) -> List[Dict[str, str]]: """Extract User+Assistant pairs from message list.""" pairs = [] i = 0 while i < len(messages): msg = messages[i] if msg.get("role") == "user": userText = (msg.get("content") or "").strip() assistantText = "" if i + 1 < len(messages) and messages[i + 1].get("role") == "assistant": assistantText = (messages[i + 1].get("content") or "").strip() i += 2 else: i += 1 if userText: text = f"Benutzer: {userText}" if assistantText: text += f"\nCoach: {assistantText}" pairs.append({"text": text}) else: i += 1 return pairs def _parseJsonSafe(value, fallback): if not value: return fallback if isinstance(value, (list, dict)): return value try: return json.loads(value) except (json.JSONDecodeError, TypeError): return fallback