223 lines
7.5 KiB
Python
223 lines
7.5 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
CommCoach Session Indexer.
|
|
Indexes coaching session data into the knowledge store (pgvector) for RAG-based long-term memory.
|
|
Called after session completion to ensure semantic searchability across 20+ sessions.
|
|
"""
|
|
|
|
import logging
|
|
import uuid
|
|
import json
|
|
from typing import List, Dict, Any, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_COACHING_FILE_PREFIX = "coaching-session:"
|
|
|
|
|
|
async def indexSessionData(
|
|
sessionId: str,
|
|
contextId: str,
|
|
userId: str,
|
|
featureInstanceId: str,
|
|
mandateId: str,
|
|
messages: List[Dict[str, Any]],
|
|
summary: Optional[str],
|
|
keyTopics: Optional[str],
|
|
goals: Optional[List[Any]],
|
|
insights: Optional[List[Any]],
|
|
tasks: Optional[List[Dict[str, Any]]],
|
|
contextTitle: str = "",
|
|
knowledgeService=None,
|
|
):
|
|
"""Index a completed coaching session into the knowledge store.
|
|
|
|
Creates ContentChunks with embeddings for:
|
|
- Each User+Assistant message pair (maximum detail depth)
|
|
- Session summary
|
|
- Key topics (individually, for precise retrieval)
|
|
- Current goals
|
|
- New insights
|
|
- Tasks (open + done)
|
|
"""
|
|
if not knowledgeService:
|
|
logger.warning("No knowledge service available for coaching indexer")
|
|
return
|
|
|
|
syntheticFileId = f"{_COACHING_FILE_PREFIX}{sessionId}"
|
|
|
|
chunks = []
|
|
|
|
# 1. Message pairs (User + Assistant) as individual chunks
|
|
messagePairs = _extractMessagePairs(messages)
|
|
for idx, pair in enumerate(messagePairs):
|
|
chunks.append({
|
|
"contentObjectId": f"{sessionId}:msg-pair:{idx}",
|
|
"data": pair["text"],
|
|
"contextRef": {
|
|
"containerPath": f"session:{sessionId}",
|
|
"location": f"message-pair-{idx}",
|
|
"type": "coaching-message-pair",
|
|
"contextId": contextId,
|
|
"sessionId": sessionId,
|
|
"contextTitle": contextTitle,
|
|
},
|
|
})
|
|
|
|
# 2. Session summary
|
|
if summary:
|
|
chunks.append({
|
|
"contentObjectId": f"{sessionId}:summary",
|
|
"data": f"Session-Zusammenfassung ({contextTitle}): {summary}",
|
|
"contextRef": {
|
|
"containerPath": f"session:{sessionId}",
|
|
"location": "summary",
|
|
"type": "coaching-session-summary",
|
|
"contextId": contextId,
|
|
"sessionId": sessionId,
|
|
"contextTitle": contextTitle,
|
|
},
|
|
})
|
|
|
|
# 3. Key topics (each as separate chunk for precise retrieval)
|
|
parsedTopics = _parseJsonSafe(keyTopics, [])
|
|
for tidx, topic in enumerate(parsedTopics):
|
|
topicStr = str(topic).strip()
|
|
if topicStr:
|
|
chunks.append({
|
|
"contentObjectId": f"{sessionId}:topic:{tidx}",
|
|
"data": f"Coaching-Thema ({contextTitle}): {topicStr}",
|
|
"contextRef": {
|
|
"containerPath": f"session:{sessionId}",
|
|
"location": f"topic-{tidx}",
|
|
"type": "coaching-key-topic",
|
|
"contextId": contextId,
|
|
"sessionId": sessionId,
|
|
"contextTitle": contextTitle,
|
|
},
|
|
})
|
|
|
|
# 4. Goals
|
|
if goals:
|
|
goalTexts = [g.get("text", g) if isinstance(g, dict) else str(g) for g in goals if g]
|
|
if goalTexts:
|
|
goalsStr = "\n".join(f"- {g}" for g in goalTexts)
|
|
chunks.append({
|
|
"contentObjectId": f"{sessionId}:goals",
|
|
"data": f"Coaching-Ziele ({contextTitle}):\n{goalsStr}",
|
|
"contextRef": {
|
|
"containerPath": f"session:{sessionId}",
|
|
"location": "goals",
|
|
"type": "coaching-goals",
|
|
"contextId": contextId,
|
|
"sessionId": sessionId,
|
|
"contextTitle": contextTitle,
|
|
},
|
|
})
|
|
|
|
# 5. Insights
|
|
if insights:
|
|
insightTexts = [i.get("text", i) if isinstance(i, dict) else str(i) for i in insights if i]
|
|
if insightTexts:
|
|
insightsStr = "\n".join(f"- {t}" for t in insightTexts)
|
|
chunks.append({
|
|
"contentObjectId": f"{sessionId}:insights",
|
|
"data": f"Coaching-Erkenntnisse ({contextTitle}):\n{insightsStr}",
|
|
"contextRef": {
|
|
"containerPath": f"session:{sessionId}",
|
|
"location": "insights",
|
|
"type": "coaching-insights",
|
|
"contextId": contextId,
|
|
"sessionId": sessionId,
|
|
"contextTitle": contextTitle,
|
|
},
|
|
})
|
|
|
|
# 6. Tasks
|
|
if tasks:
|
|
taskLines = []
|
|
for t in tasks:
|
|
status = t.get("status", "open")
|
|
title = t.get("title", "")
|
|
if title:
|
|
taskLines.append(f"- [{status}] {title}")
|
|
if taskLines:
|
|
tasksStr = "\n".join(taskLines)
|
|
chunks.append({
|
|
"contentObjectId": f"{sessionId}:tasks",
|
|
"data": f"Coaching-Aufgaben ({contextTitle}):\n{tasksStr}",
|
|
"contextRef": {
|
|
"containerPath": f"session:{sessionId}",
|
|
"location": "tasks",
|
|
"type": "coaching-tasks",
|
|
"contextId": contextId,
|
|
"sessionId": sessionId,
|
|
"contextTitle": contextTitle,
|
|
},
|
|
})
|
|
|
|
if not chunks:
|
|
logger.info(f"No chunks to index for session {sessionId}")
|
|
return
|
|
|
|
logger.info(f"Indexing {len(chunks)} chunks for coaching session {sessionId}")
|
|
|
|
try:
|
|
contentObjects = [
|
|
{
|
|
"contentObjectId": c["contentObjectId"],
|
|
"contentType": "text",
|
|
"data": c["data"],
|
|
"contextRef": c["contextRef"],
|
|
}
|
|
for c in chunks
|
|
]
|
|
|
|
await knowledgeService.indexFile(
|
|
fileId=syntheticFileId,
|
|
fileName=f"coaching-session-{sessionId[:8]}",
|
|
mimeType="application/x-coaching-session",
|
|
userId=userId,
|
|
featureInstanceId=featureInstanceId,
|
|
mandateId=mandateId,
|
|
contentObjects=contentObjects,
|
|
)
|
|
logger.info(f"Successfully indexed coaching session {sessionId} ({len(chunks)} chunks)")
|
|
except Exception as e:
|
|
logger.error(f"Failed to index coaching session {sessionId}: {e}", exc_info=True)
|
|
|
|
|
|
def _extractMessagePairs(messages: List[Dict[str, Any]]) -> List[Dict[str, str]]:
|
|
"""Extract User+Assistant pairs from message list."""
|
|
pairs = []
|
|
i = 0
|
|
while i < len(messages):
|
|
msg = messages[i]
|
|
if msg.get("role") == "user":
|
|
userText = (msg.get("content") or "").strip()
|
|
assistantText = ""
|
|
if i + 1 < len(messages) and messages[i + 1].get("role") == "assistant":
|
|
assistantText = (messages[i + 1].get("content") or "").strip()
|
|
i += 2
|
|
else:
|
|
i += 1
|
|
if userText:
|
|
text = f"Benutzer: {userText}"
|
|
if assistantText:
|
|
text += f"\nCoach: {assistantText}"
|
|
pairs.append({"text": text})
|
|
else:
|
|
i += 1
|
|
return pairs
|
|
|
|
|
|
def _parseJsonSafe(value, fallback):
|
|
if not value:
|
|
return fallback
|
|
if isinstance(value, (list, dict)):
|
|
return value
|
|
try:
|
|
return json.loads(value)
|
|
except (json.JSONDecodeError, TypeError):
|
|
return fallback
|