gateway/modules/serviceCenter/services/serviceAgent/conversationManager.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Conversation manager for the Agent service.
Handles message history, context window management, and progressive summarization."""

import logging
from typing import List, Dict, Any, Optional

from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolDefinition
from modules.shared.timeUtils import getRequestNow, getRequestTimezone

logger = logging.getLogger(__name__)

FIRST_SUMMARY_ROUND = 6
META_SUMMARY_ROUND = 10
KEEP_RECENT_MESSAGES = 6
MAX_ESTIMATED_TOKENS = 60000
_MAX_HISTORY_MESSAGES = 40
_MAX_HISTORY_MSG_CHARS = 12000


class ConversationManager:
    """Manages the conversation history and context window for agent runs.

    Progressive summarization strategy:
      - Rounds 1-5: full conversation retained
      - Round 6+: older messages compressed into a running summary
      - Round 10+: meta-summary replaces prior summaries
    Long-term facts (file refs, tool results, decisions) are persisted
    externally in RoundMemory and retrieved via RAG, so the summary
    can focus on reasoning and relationships.
    Supports RAG context injection before each round via injectRagContext."""

    def __init__(self, systemPrompt: str):
        self._messages: List[Dict[str, Any]] = [
            {"role": "system", "content": systemPrompt}
        ]
        self._summaries: List[Dict[str, Any]] = []
        self._lastSummarizedRound: int = 0
        self._ragContextInjected: bool = False

    def loadHistory(self, messages: List[Dict[str, Any]]):
        """Load prior conversation messages for follow-up context.

        Accepts messages with {role, content/message} format (as stored in DB).
        Truncates long messages and limits total count to keep the context window
        manageable. Must be called BEFORE addUserMessage with the current prompt.
        """
        if not messages:
            return

        recent = messages[-_MAX_HISTORY_MESSAGES:]
        loaded = 0
        for msg in recent:
            role = msg.get("role", "")
            content = msg.get("content", "") or msg.get("message", "") or ""
            if role not in ("user", "assistant"):
                continue
            if not content.strip():
                continue
            if len(content) > _MAX_HISTORY_MSG_CHARS:
                content = content[:_MAX_HISTORY_MSG_CHARS] + "…"
            self._messages.append({"role": role, "content": content})
            loaded += 1
        if loaded:
            logger.info(f"Loaded {loaded} history messages into conversation context")

    @property
    def messages(self) -> List[Dict[str, Any]]:
        """Current messages for the next AI call (internal markers stripped)."""
        return [
            {k: v for k, v in msg.items() if not k.startswith("_")}
            for msg in self._messages
        ]

    def addUserMessage(self, content: str, isCurrentPrompt: bool = False):
        """Add a user message.

        Args:
            content: Message text.
            isCurrentPrompt: If True, this message is the user's current
                task prompt and will never be removed by progressive
                summarization.
        """
        msg: Dict[str, Any] = {"role": "user", "content": content}
        if isCurrentPrompt:
            msg["_isCurrentPrompt"] = True
        self._messages.append(msg)

    def addAssistantMessage(self, content: str, toolCalls: List[Dict[str, Any]] = None):
        """Add an assistant message, optionally with tool calls."""
        msg: Dict[str, Any] = {"role": "assistant", "content": content}
        if toolCalls:
            msg["tool_calls"] = toolCalls
        self._messages.append(msg)

    def addToolResults(self, results: List[Dict[str, Any]]):
        """Add tool results to the conversation.
        Each result: {toolCallId, toolName, content}."""
        for result in results:
            self._messages.append({
                "role": "tool",
                "tool_call_id": result["toolCallId"],
                "content": result["content"]
            })

    def addToolResultsAsText(self, resultText: str):
        """Add combined tool results as a user message (text-based fallback)."""
        self._messages.append({
            "role": "user",
            "content": f"Tool Results:\n{resultText}"
        })

    def injectRagContext(self, ragContext: str):
        """Inject RAG context as a system message right after the main system prompt.

        Called before each agent round by the agent loop if KnowledgeService is available.
        Replaces any previously injected RAG context to keep the context fresh."""
        if not ragContext:
            return

        ragMessage = {
            "role": "system",
            "content": f"Relevant Knowledge (from indexed documents and workflow context):\n{ragContext}",
            "_isRagContext": True,
        }

        # Replace existing RAG message if present, otherwise insert after system prompt
        for i, msg in enumerate(self._messages):
            if msg.get("_isRagContext"):
                self._messages[i] = ragMessage
                self._ragContextInjected = True
                return

        # Insert after the first system prompt
        self._messages.insert(1, ragMessage)
        self._ragContextInjected = True

    def getMessageCount(self) -> int:
        """Get the number of messages (excluding system prompt)."""
        return len(self._messages) - 1

    def estimateTokenCount(self) -> int:
        """Rough estimate of total tokens in the conversation (4 chars ≈ 1 token)."""
        totalChars = sum(len(str(m.get("content", ""))) for m in self._messages)
        return totalChars // 4

    def needsSummarization(self, currentRound: int) -> bool:
        """Check if progressive summarization should be triggered.

        Triggers:
          - At round FIRST_SUMMARY_ROUND (6) if not yet summarized
          - At round META_SUMMARY_ROUND (10) for meta-summary
          - Every 5 rounds after that
          - When estimated token count exceeds MAX_ESTIMATED_TOKENS
        """
        if currentRound >= FIRST_SUMMARY_ROUND and self._lastSummarizedRound < currentRound:
            if currentRound == FIRST_SUMMARY_ROUND or currentRound == META_SUMMARY_ROUND:
                return True
            if (currentRound - META_SUMMARY_ROUND) % 5 == 0 and currentRound > META_SUMMARY_ROUND:
                return True
        if self.estimateTokenCount() > MAX_ESTIMATED_TOKENS:
            return True
        return False

    async def summarize(
        self,
        currentRound: int,
        aiCallFn,
        externalMemoryKeys: List[str] = None,
    ) -> Optional[str]:
        """Perform progressive summarization of older messages.

        Rounds 1-5: full history retained, no summarization.
        Round 6+: compress older messages into a running summary.
        Round 10+: meta-summary that consolidates prior summaries.

        Args:
            currentRound: Current agent round number.
            aiCallFn: Async function that takes a prompt string and returns summary text.
            externalMemoryKeys: Keys of RoundMemory entries for this workflow,
                so the summary prompt can de-duplicate already-persisted facts.
        """
        if currentRound < FIRST_SUMMARY_ROUND and self.estimateTokenCount() <= MAX_ESTIMATED_TOKENS:
            return None

        systemMsgs = [m for m in self._messages if m.get("role") == "system"]
        nonSystemMessages = [m for m in self._messages if m.get("role") != "system"]

        keepRecent = min(KEEP_RECENT_MESSAGES, len(nonSystemMessages))
        if len(nonSystemMessages) <= keepRecent + 1:
            return None

        splitIdx = len(nonSystemMessages) - keepRecent
        # Ensure the split doesn't orphan tool messages from their assistant.
        # Walk backwards from splitIdx: if we're landing in the middle of a
        # tool-call sequence (assistant+tool_calls → tool → tool …), include
        # the entire sequence in recentMessages.
        while splitIdx > 0 and nonSystemMessages[splitIdx].get("role") == "tool":
            splitIdx -= 1
        # Also include the assistant message that triggered the tool calls.
        if splitIdx > 0 and splitIdx < len(nonSystemMessages) and \
           nonSystemMessages[splitIdx].get("role") == "assistant" and \
           nonSystemMessages[splitIdx].get("tool_calls"):
            pass  # splitIdx already points at the assistant; keep it in recent
        elif splitIdx == 0:
            return None  # nothing to summarize

        messagesToSummarize = nonSystemMessages[:splitIdx]
        recentMessages = nonSystemMessages[splitIdx:]

        # Protect the current user prompt: it must NEVER be summarized away.
        promptInRecent = any(m.get("_isCurrentPrompt") for m in recentMessages)
        if not promptInRecent:
            for i, m in enumerate(messagesToSummarize):
                if m.get("_isCurrentPrompt"):
                    recentMessages = messagesToSummarize[i:] + recentMessages
                    messagesToSummarize = messagesToSummarize[:i]
                    break
            if not messagesToSummarize:
                return None

        summaryInput = _formatMessagesForSummary(messagesToSummarize)
        previousSummary = self._summaries[-1]["content"] if self._summaries else ""

        isMetaSummary = currentRound >= META_SUMMARY_ROUND and len(self._summaries) >= 2
        summaryPrompt = _buildSummaryPrompt(
            summaryInput, previousSummary, isMetaSummary,
            externalMemoryKeys=externalMemoryKeys,
        )

        try:
            summaryText = await aiCallFn(summaryPrompt)
        except Exception as e:
            logger.error(f"Progressive summarization failed: {e}")
            return None

        self._summaries.append({
            "round": currentRound,
            "content": summaryText,
            "isMeta": isMetaSummary,
        })
        self._lastSummarizedRound = currentRound

        mainSystem = systemMsgs[0] if systemMsgs else {"role": "system", "content": ""}
        ragMessages = [m for m in systemMsgs if m.get("_isRagContext")]

        self._messages = [
            mainSystem,
            *ragMessages,
            {"role": "system", "content": f"Conversation Summary (rounds 1-{currentRound - keepRecent}):\n{summaryText}"},
            *recentMessages,
        ]

        logger.info(
            f"Progressive summarization at round {currentRound}: "
            f"compressed {len(messagesToSummarize)} messages into "
            f"{'meta-' if isMetaSummary else ''}summary"
        )
        return summaryText


def _formatMessagesForSummary(messages: List[Dict[str, Any]]) -> str:
    """Format messages into a text block for summarization."""
    parts = []
    for msg in messages:
        role = msg.get("role", "unknown")
        content = msg.get("content", "")
        if role == "tool":
            toolName = msg.get("tool_call_id", "tool")
            parts.append(f"[Tool Result ({toolName})]:\n{content}")
        elif role == "assistant" and msg.get("tool_calls"):
            calls = msg["tool_calls"]
            callNames = [c.get("function", {}).get("name", "?") for c in calls]
            parts.append(f"[Assistant → Tool Calls: {', '.join(callNames)}]")
            if content:
                parts.append(f"[Assistant]: {content}")
        else:
            parts.append(f"[{role.capitalize()}]: {content}")
    return "\n\n".join(parts)


def _buildSummaryPrompt(
    messagesText: str,
    previousSummary: str,
    isMetaSummary: bool = False,
    externalMemoryKeys: List[str] = None,
) -> str:
    """Build the prompt for progressive summarization.

    When externalMemoryKeys is provided, the summary prompt tells the AI
    that those facts are preserved in external memory and need not be
    repeated verbatim — the summary can focus on reasoning, decisions,
    and relationships instead.
    """
    externalHint = ""
    if externalMemoryKeys:
        keyList = ", ".join(externalMemoryKeys[:20])
        externalHint = (
            "NOTE: The following facts are preserved in external persistent memory "
            "and do NOT need to be repeated in detail in the summary: "
            f"[{keyList}]. "
            "Focus on reasoning, decisions, relationships, and anything that is "
            "NOT captured by those external memory entries.\n\n"
        )

    if isMetaSummary:
        prompt = (
            "Create a comprehensive meta-summary consolidating the previous summary "
            "and the new messages. Preserve all key facts, decisions, entities (names, "
            "numbers, dates), tool results, and action outcomes. Be concise but complete.\n\n"
        )
    else:
        prompt = (
            "Summarize the following conversation concisely. Preserve key decisions, "
            "reasoning chains, entities (names, numbers, dates), and action outcomes. "
            "Do not lose any important information.\n\n"
        )
    prompt += externalHint
    if previousSummary:
        prompt += f"Previous Summary:\n{previousSummary}\n\n"
    prompt += f"New Messages to Summarize:\n{messagesText}\n\nProvide a concise, factual summary:"
    return prompt


def _buildTemporalContext() -> str:
    """Inject current date/time (in the user's browser timezone) into the system prompt.

    LLMs have no innate access to "now" and otherwise hallucinate from their
    training cutoff. The browser timezone is propagated via the
    ``X-User-Timezone`` request header (see ``api.ts`` axios interceptor and the
    ``_requestContextMiddleware`` in ``app.py``). When called outside of an HTTP
    context, ``getRequestNow()`` falls back to UTC.
    """
    tz = getRequestTimezone()
    now = getRequestNow()
    return (
        "## Current Date & Time\n"
        f"- Today: {now.strftime('%Y-%m-%d (%A)')}\n"
        f"- Now: {now.strftime('%H:%M')} ({tz})\n"
        "- Use this for any relative time references such as \"today\", "
        "\"yesterday\", \"last week\", \"this month\", \"Q1\", etc.\n"
        "- Do NOT rely on your training cutoff for the current date.\n\n"
    )


def buildSystemPrompt(
    tools: List[ToolDefinition],
    toolsFormatted: str = None,
    userLanguage: str = "",
) -> str:
    """Build the system prompt for the agent.

    Args:
        tools: Available tool definitions.
        toolsFormatted: Pre-formatted tool descriptions for text-based fallback.
        userLanguage: Kept for backwards compatibility, no longer used for language selection.
    """
    langInstruction = (
        "IMPORTANT: Always respond in the same language the user writes in. "
        "If the user writes in German, respond in German. If in French, respond in French. "
        "Generate documents and content in the user's language unless explicitly asked otherwise. "
        "Only use English for tool call arguments and technical identifiers.\n\n"
    )

    prompt = (
        _buildTemporalContext()
        + f"{langInstruction}"
        + "You are an AI agent with access to tools. "
        "Use the provided tools to accomplish the user's task. "
        "Think step by step. Call tools when you need information or need to perform actions. "
        "When you have enough information to answer, respond directly without calling tools.\n\n"
    )

    prompt += (
        "## Working Guidelines\n\n"
        "### Workflow Context\n"
        "When continuing a workflow (follow-up message), the Relevant Knowledge section contains "
        "artifacts from previous rounds (file IDs, operations). Use this context instead of "
        "re-searching or re-listing files.\n\n"
        "### Efficient File Editing\n"
        "- Use readFile with offset/limit to read specific line ranges of large files.\n"
        "- Use searchInFileContent to find text before editing.\n"
        "- Use replaceInFile for targeted edits (preferred over rewriting entire files).\n"
        "- Use writeFile(mode='overwrite') only when the entire content must change.\n\n"
        "### Large Content Strategy\n"
        "- For content larger than ~8000 characters: use writeFile(mode='create') for the first "
        "part, then writeFile(fileId=..., mode='append') for subsequent parts.\n"
        "- Split large documentation into multiple focused files rather than one huge document.\n"
        "- Structure outputs so files reference each other (e.g. index.md linking to sections).\n\n"
        "### Code Generation\n"
        "- Prefer modular file structures over monolithic files.\n"
        "- When generating applications, create separate files for logical components.\n"
        "- Always plan the structure before writing code.\n\n"
        "### Document references for AI tools (CRITICAL)\n"
        "Tools that produce a file (`downloadFromDataSource`, `writeFile mode=create`, "
        "`renderDocument`, `generateImage`, `createChart`) return a result line with TWO ids:\n"
        "- `documentList ref: docItem:<chatDocId>` — pass this STRING VERBATIM as an entry of "
        "  `documentList` for `ai_process`, `ai_summarizeDocument`, `context_extractContent`, "
        "  `context_neutralizeData`, etc. Always as the literal `docItem:<id>` — do NOT wrap "
        "  in `{\"documents\":[{\"id\":...}]}` and do NOT use the file id here, the documentList "
        "  resolver only matches `docItem:` references.\n"
        "- `file id: <fileId>` — use for `readFile`, `searchInFileContent`, `writeFile mode=append`, "
        "  and image embeds (`![alt](file:<fileId>)`).\n"
        "Example: after `downloadFromDataSource` returns `docItem:abc123`, call "
        "`ai_summarizeDocument(documentList=[\"docItem:abc123\"], summaryLength=\"medium\")`.\n\n"
    )

    if toolsFormatted:
        prompt += f"Available Tools:\n{toolsFormatted}\n\n"
        prompt += (
            "To call a tool, use this format:\n"
            "```tool_call\n"
            "tool: <tool_name>\n"
            'args: {"param": "value"}\n'
            "```\n\n"
        )
    return prompt