removed demo files

This commit is contained in:
Ida Dittrich 2026-02-26 16:05:26 +01:00
parent d3dbca7289
commit 025fb8ff61
15 changed files with 2 additions and 2429 deletions

View file

@ -1,3 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Chatbot V2 feature - context-aware chat with file upload and extraction."""

View file

@ -1,8 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Chatbot V2 bridges - AI and memory."""
from modules.features.chatbot.bridges.ai import AICenterChatModel, clear_workflow_allowed_providers
from .memory import ChatbotV2Checkpointer
__all__ = ["AICenterChatModel", "clear_workflow_allowed_providers", "ChatbotV2Checkpointer"]

View file

@ -1,187 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Chatbot V2 checkpointer - maps LangGraph state to ChatbotV2 message storage.
"""
import logging
import uuid
from typing import Any, Dict, List, Optional, Tuple, NamedTuple
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langgraph.checkpoint.base import BaseCheckpointSaver, Checkpoint, CheckpointMetadata
class CheckpointTuple(NamedTuple):
"""Tuple containing config, checkpoint, metadata, parent_config, and pending_writes."""
config: Dict[str, Any]
checkpoint: Checkpoint
metadata: CheckpointMetadata
parent_config: Optional[Dict[str, Any]] = None
pending_writes: Optional[List[Tuple[str, Any]]] = None
from modules.features.chatbotV2.interfaceFeatureChatbotV2 import getInterface as getChatbotV2Interface
from modules.features.chatbotV2.datamodelFeatureChatbotV2 import ChatbotV2Message
from modules.datamodels.datamodelUam import User
from modules.shared.timeUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
def _sanitize_llm_response(text: str) -> str:
"""Strip chat template tokens and trailing junk."""
if not text or not isinstance(text, str):
return text or ""
for sentinel in ("<|im_start|>", "<|im_end|>", "<|endoftext|>", "<|user|>", "<|assistant|>"):
if sentinel in text:
text = text.split(sentinel)[0]
return text.strip()
class ChatbotV2Checkpointer(BaseCheckpointSaver):
"""Checkpointer that stores messages via ChatbotV2 interface."""
def __init__(
self,
user: User,
workflow_id: str,
mandateId: Optional[str] = None,
featureInstanceId: Optional[str] = None
):
self.user = user
self.workflow_id = workflow_id
self.interface = getChatbotV2Interface(
user,
mandateId=mandateId,
featureInstanceId=featureInstanceId
)
def _to_db_message(
self,
msg: BaseMessage,
sequence_nr: int,
round_number: int
) -> Dict[str, Any]:
role = "user"
content = ""
if isinstance(msg, HumanMessage):
role = "user"
content = msg.content if isinstance(msg.content, str) else str(msg.content or "")
elif isinstance(msg, AIMessage):
role = "assistant"
content = msg.content if isinstance(msg.content, str) else str(msg.content or "")
content = _sanitize_llm_response(content)
elif isinstance(msg, SystemMessage):
role = "system"
content = msg.content if isinstance(msg.content, str) else str(msg.content or "")
return {
"id": str(uuid.uuid4()),
"conversationId": self.workflow_id,
"message": content,
"role": role,
"status": "step" if sequence_nr > 1 else "first",
"sequenceNr": sequence_nr,
"publishedAt": getUtcTimestamp(),
"roundNumber": round_number
}
def _to_langchain(self, messages: List[ChatbotV2Message]) -> List[BaseMessage]:
result = []
for m in messages:
if m.role == "user":
result.append(HumanMessage(content=m.message or ""))
elif m.role == "assistant":
result.append(AIMessage(content=m.message or ""))
elif m.role == "system":
result.append(SystemMessage(content=m.message or ""))
return result
def put(
self,
config: Dict[str, Any],
checkpoint: Checkpoint,
metadata: CheckpointMetadata,
new_versions: Dict[str, int],
) -> None:
thread_id = config.get("configurable", {}).get("thread_id", self.workflow_id)
conv = self.interface.getConversation(thread_id)
if not conv:
logger.warning(f"Conversation {thread_id} not found")
return
round_number = conv.currentRound or 1
state = checkpoint.get("channel_values", {})
messages = state.get("messages", [])
if not messages:
return
existing = self.interface.getMessages(thread_id)
existing_set = {(m.role, m.message) for m in existing}
existing_count = len(existing)
for i, msg in enumerate(messages):
if not isinstance(msg, (HumanMessage, AIMessage)):
continue
role = "user" if isinstance(msg, HumanMessage) else "assistant"
content = msg.content if isinstance(msg.content, str) else str(msg.content or "")
if isinstance(msg, AIMessage):
content = _sanitize_llm_response(content)
if not content or not content.strip():
continue
if (role, content) in existing_set:
continue
existing_set.add((role, content))
existing_count += 1
db_msg = self._to_db_message(msg, existing_count, round_number)
self.interface.createMessage(db_msg)
self.interface.updateConversation(thread_id, {"lastActivity": getUtcTimestamp()})
def get(self, config: Dict[str, Any]) -> Optional[Checkpoint]:
thread_id = config.get("configurable", {}).get("thread_id", self.workflow_id)
conv = self.interface.getConversation(thread_id)
if not conv:
return None
messages = self.interface.getMessages(thread_id)
lc_messages = self._to_langchain(messages)
return {
"id": str(uuid.uuid4()),
"v": 1,
"ts": getUtcTimestamp(),
"channel_values": {"messages": lc_messages},
"channel_versions": {},
"versions_seen": {}
}
# Async methods required for LangGraph ainvoke/astream
async def aget_tuple(
self,
config: Dict[str, Any],
) -> Optional[CheckpointTuple]:
"""Async version of get that returns tuple of (config, checkpoint, metadata)."""
checkpoint = self.get(config)
if checkpoint:
metadata: CheckpointMetadata = {"step": 0}
return CheckpointTuple(
config=config,
checkpoint=checkpoint,
metadata=metadata,
parent_config=None,
pending_writes=None,
)
return None
async def aput(
self,
config: Dict[str, Any],
checkpoint: Checkpoint,
metadata: CheckpointMetadata,
new_versions: Dict[str, int],
) -> None:
"""Async version of put."""
self.put(config, checkpoint, metadata, new_versions)
async def aput_writes(
self,
config: Dict[str, Any],
writes: List[Tuple[str, Any]],
task_id: str,
) -> None:
"""Async version of put_writes. No-op - writes are handled through aput()."""
pass

View file

@ -1,210 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Chatbot V2 domain logic - simple chat LangGraph with context injection.
Uses chunk-based retrieval with retry: when AI says "nicht enthalten",
tries the next chunk batch until content is found or all chunks searched.
"""
import asyncio
import logging
import re
from typing import Annotated, Optional, TYPE_CHECKING, List, Dict, Any
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langgraph.graph.message import add_messages
from langgraph.graph import StateGraph, START, END
from langgraph.graph.state import CompiledStateGraph
from pydantic import BaseModel
from modules.features.chatbotV2.bridges import AICenterChatModel, ChatbotV2Checkpointer
from modules.features.chatbotV2.contextChunkRetrieval import (
chunk_sections,
chunk_text_blocks,
get_ordered_chunks_for_question,
get_chunk_batch,
response_indicates_not_found,
DEFAULT_CHUNK_SIZE,
DEFAULT_CHUNK_OVERLAP,
)
if TYPE_CHECKING:
from modules.features.chatbotV2.config import ChatbotV2Config
logger = logging.getLogger(__name__)
class ChatState(BaseModel):
"""State for Chatbot V2 chat session."""
messages: Annotated[list[BaseMessage], add_messages]
# Optional context for chunk retrieval (passed at invoke, not persisted)
chatbotv2_context: Optional[Dict[str, Any]] = None
# Default max context chars (~20k tokens) - fits GPT 25k limit with room for prompt + response
DEFAULT_MAX_CONTEXT_CHARS = 60_000
def _build_system_prompt_from_chunks(
base_prompt: str,
chunks: List[Dict[str, Any]]
) -> str:
"""Build system prompt from a list of chunks."""
if not chunks:
return base_prompt
header = "\n\n--- DOCUMENT CONTEXT (use this to answer user questions) ---\n"
parts = [base_prompt, header]
current_file = None
for chunk in chunks:
fn = chunk.get("fileName", "document")
if fn != current_file:
parts.append(f"\n### {fn}\n")
current_file = fn
parts.append(chunk.get("text", ""))
return "\n".join(parts)
def build_context_system_prompt(
base_prompt: str,
extracted_context: dict,
user_question: str,
max_context_chars: Optional[int] = None,
chunk_size: Optional[int] = None,
chunk_overlap: Optional[int] = None,
) -> str:
"""
Build system prompt with first chunk batch (for single-call use).
For retry loop, use get_ordered_chunks_for_question + get_chunk_batch + _build_system_prompt_from_chunks.
"""
chunks = _get_all_chunks(extracted_context, chunk_size, chunk_overlap)
if not chunks:
return base_prompt
ordered = get_ordered_chunks_for_question(chunks, user_question or "")
selected = get_chunk_batch(ordered, 0, max_context_chars or DEFAULT_MAX_CONTEXT_CHARS)
return _build_system_prompt_from_chunks(base_prompt, selected)
def _get_all_chunks(
extracted_context: dict,
chunk_size: Optional[int],
chunk_overlap: Optional[int],
) -> List[Dict[str, Any]]:
"""Get all chunks from extracted context."""
sections = extracted_context.get("sections", [])
text_blocks = extracted_context.get("textBlocks", [])
if not sections and not text_blocks:
return []
cs = chunk_size if chunk_size and chunk_size > 0 else DEFAULT_CHUNK_SIZE
co = chunk_overlap if chunk_overlap is not None and chunk_overlap >= 0 else DEFAULT_CHUNK_OVERLAP
if sections:
return chunk_sections(sections, chunk_size=cs, chunk_overlap=co)
return chunk_text_blocks(text_blocks, chunk_size=cs, chunk_overlap=co)
def create_chat_graph(
model: AICenterChatModel,
memory: ChatbotV2Checkpointer,
) -> CompiledStateGraph:
"""
Create chat graph with retry loop: when AI says content not found,
tries next chunk batch until found or exhausted.
Context params passed via state.chatbotv2_context at invoke time.
"""
async def chat_node(state: ChatState) -> dict:
# State can be dict (LangGraph) or Pydantic model
state_dict = state if isinstance(state, dict) else (state.model_dump() if hasattr(state, "model_dump") else {})
msgs = state_dict.get("messages", [])
if not msgs:
return {}
ctx = state_dict.get("chatbotv2_context") or {}
ctx_dict = ctx.get("ctx_dict", {})
user_question = ctx.get("user_question", "")
base_prompt = ctx.get("base_prompt", "Answer based on the provided context.")
max_chars = ctx.get("max_context_chars") or DEFAULT_MAX_CONTEXT_CHARS
chunk_size = ctx.get("chunk_size") or DEFAULT_CHUNK_SIZE
chunk_overlap = ctx.get("chunk_overlap")
if max_chars <= 0:
max_chars = DEFAULT_MAX_CONTEXT_CHARS
if chunk_overlap is None or chunk_overlap < 0:
chunk_overlap = DEFAULT_CHUNK_OVERLAP
user_msgs = [m for m in msgs if not isinstance(m, SystemMessage)]
if not user_msgs:
return {}
# Get chunks - use DOCUMENT ORDER for retry (batch 0 = start, batch 1 = next part, etc.)
chunks = _get_all_chunks(ctx_dict, chunk_size, chunk_overlap)
if not chunks:
logger.warning("No chunks from ctx_dict - sections=%s, textBlocks=%s",
len(ctx_dict.get("sections", [])), len(ctx_dict.get("textBlocks", [])))
# Always use document order (chunkIndex) for systematic search through entire document
ordered = sorted(chunks, key=lambda c: c.get("chunkIndex", 0))
batch_index = 0
last_response = None
logger.info("Chunk retrieval: %d chunks total, max_chars=%d, will try batches until found or exhausted",
len(ordered), max_chars)
while True:
batch = get_chunk_batch(ordered, batch_index, max_chars) if ordered else []
if not batch:
# No more chunks - return last response or final message
if last_response:
return {"messages": [last_response]}
return {"messages": [AIMessage(
content="Ich habe das gesamte Dokument durchsucht, konnte aber keine "
"passende Information zu Ihrer Frage finden. Bitte formulieren Sie die Frage "
"ggf. anders oder prüfen Sie, ob das Dokument die gewünschten Angaben enthält."
)]}
system_prompt = _build_system_prompt_from_chunks(base_prompt, batch)
window = [SystemMessage(content=system_prompt)] + user_msgs
response = None
for attempt in range(3): # Max 3 attempts (initial + 2 retries on rate limit)
try:
response = await model.ainvoke(window)
break
except Exception as exc:
err_str = str(exc).lower()
if ("429" in err_str or "rate limit" in err_str) and attempt < 2:
wait_secs = 6
match = re.search(r"try again in ([\d.]+)s", err_str, re.IGNORECASE)
if match:
wait_secs = max(6, int(float(match.group(1))) + 1)
logger.warning("Rate limit hit on chunk batch %d, waiting %ds before retry (attempt %d/3)",
batch_index, wait_secs, attempt + 1)
await asyncio.sleep(wait_secs)
else:
if "No suitable model found" in str(exc):
return {"messages": [AIMessage(
content="Es tut mir leid, derzeit steht kein passendes KI-Modell zur Verfügung. "
"Bitte versuchen Sie es später erneut."
)]}
raise
if response is None:
return {"messages": [AIMessage(
content="Ein Fehler ist aufgetreten. Bitte versuchen Sie es später erneut."
)]}
content = response.content if hasattr(response, "content") else str(response)
if response_indicates_not_found(content):
logger.info("Chunk batch %d: AI said not found (%.0f chars), trying next batch",
batch_index, len(content))
batch_index += 1
last_response = response
await asyncio.sleep(5) # Pause before next batch to avoid rate limits
continue
logger.info("Chunk batch %d: Found answer (%.0f chars)", batch_index, len(content))
return {"messages": [response]}
workflow = StateGraph(ChatState)
workflow.add_node("chat", chat_node)
workflow.add_edge(START, "chat")
workflow.add_edge("chat", END)
return workflow.compile(checkpointer=memory)

View file

@ -1,98 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Configuration system for Chatbot V2 instances.
Loads configuration from FeatureInstance.config JSONB field.
"""
import logging
from dataclasses import dataclass
from typing import Optional, Dict, Any, List, TYPE_CHECKING
if TYPE_CHECKING:
from modules.datamodels.datamodelFeatures import FeatureInstance
logger = logging.getLogger(__name__)
_config_cache: Dict[str, 'ChatbotV2Config'] = {}
DEFAULT_SYSTEM_PROMPT = (
"You are a helpful assistant. Answer questions based on the provided context documents. "
"When the user asks about the documents, use the extracted content to provide accurate answers. "
"If the context does not contain relevant information, say so."
)
@dataclass
class ModelConfig:
"""Model configuration for Chatbot V2."""
operationType: str = "DATA_ANALYSE"
processingMode: str = "BASIC"
allowedProviders: List[str] = None
def __post_init__(self):
if self.allowedProviders is None:
self.allowedProviders = []
def _parse_int(val: Optional[Any], default: Optional[int] = None) -> Optional[int]:
"""Parse int from config value."""
if val is None:
return default
if isinstance(val, int):
return val
try:
return int(val)
except (TypeError, ValueError):
return default
@dataclass
class ChatbotV2Config:
"""Configuration for a Chatbot V2 instance."""
id: str
name: str
systemPrompt: str
model: ModelConfig
maxContextChars: Optional[int] = None # Max document chars in system prompt (~60k ≈ 20k tokens). None = default 60k.
chunkSize: Optional[int] = None # Chunk size in chars (~15k). None = default.
chunkOverlap: Optional[int] = None # Overlap between chunks in chars (~500). None = default.
@classmethod
def from_dict(cls, data: Dict[str, Any], config_id: str = "default") -> 'ChatbotV2Config':
"""Create ChatbotV2Config from dictionary."""
system_prompt = data.get("systemPrompt") or DEFAULT_SYSTEM_PROMPT
model_data = data.get("model", {})
allowed_providers = model_data.get("allowedProviders") or data.get("allowedProviders", [])
model_config = ModelConfig(
operationType=model_data.get("operationType", "DATA_ANALYSE"),
processingMode=model_data.get("processingMode", "BASIC"),
allowedProviders=allowed_providers if isinstance(allowed_providers, list) else []
)
return cls(
id=data.get("id", config_id),
name=data.get("name", "Chatbot V2"),
systemPrompt=system_prompt,
model=model_config,
maxContextChars=_parse_int(data.get("maxContextChars")),
chunkSize=_parse_int(data.get("chunkSize")),
chunkOverlap=_parse_int(data.get("chunkOverlap"))
)
def load_chatbotv2_config_from_instance(instance: 'FeatureInstance') -> ChatbotV2Config:
"""Load Chatbot V2 configuration from a FeatureInstance's config field."""
instance_id = instance.id
cache_key = f"instance_{instance_id}"
if cache_key in _config_cache:
return _config_cache[cache_key]
config_data = instance.config or {}
config = ChatbotV2Config.from_dict(config_data, config_id=instance_id)
_config_cache[cache_key] = config
logger.info(f"Loaded chatbotv2 config from instance {instance_id}")
return config

View file

@ -1,272 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Chunk-based context retrieval for Chatbot V2.
Splits documents into chunks and selects relevant chunks per user question.
If no relevant chunk is found, falls back to next chunks in document order - no context is lost.
"""
import re
import logging
from typing import List, Dict, Any, Optional
logger = logging.getLogger(__name__)
# Default chunk size (~5k tokens each), overlap for context continuity
DEFAULT_CHUNK_SIZE = 15_000
DEFAULT_CHUNK_OVERLAP = 500
# Stopwords for relevance scoring (DE/EN/FR - minimal set)
STOPWORDS = {
"der", "die", "das", "den", "dem", "des", "ein", "eine", "einer", "eines",
"the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
"of", "with", "by", "from", "is", "are", "was", "were", "be", "been",
"le", "la", "les", "un", "une", "des", "du", "de", "et", "ou",
"was", "wer", "wie", "wo", "wann", "warum", "welche", "welcher",
}
def _extract_keywords(text: str) -> set:
"""Extract significant words for relevance scoring."""
text_lower = text.lower()
words = re.findall(r"\b[a-zàâäéèêëïîôùûüÿæœçß]{2,}\b", text_lower)
return {w for w in words if w not in STOPWORDS and len(w) > 1}
def chunk_sections(
sections: List[Dict[str, Any]],
chunk_size: int = DEFAULT_CHUNK_SIZE,
chunk_overlap: int = DEFAULT_CHUNK_OVERLAP
) -> List[Dict[str, Any]]:
"""
Split sections into overlapping chunks.
Each chunk has: chunkIndex, text, fileName, fileId, sectionIndex.
"""
chunks = []
chunk_index = 0
for sec_idx, section in enumerate(sections):
text = section.get("text", "")
file_name = section.get("fileName", "document")
file_id = section.get("fileId", "")
if not text:
continue
start = 0
while start < len(text):
end = min(start + chunk_size, len(text))
chunk_text = text[start:end]
chunks.append({
"chunkIndex": chunk_index,
"text": chunk_text,
"fileName": file_name,
"fileId": file_id,
"sectionIndex": sec_idx,
"startChar": start,
"endChar": end,
})
chunk_index += 1
start = end - chunk_overlap if end < len(text) else len(text)
return chunks
def chunk_text_blocks(
text_blocks: List[Dict[str, Any]],
chunk_size: int = DEFAULT_CHUNK_SIZE,
chunk_overlap: int = DEFAULT_CHUNK_OVERLAP
) -> List[Dict[str, Any]]:
"""Split textBlocks (blocks per file) into chunks."""
chunks = []
chunk_index = 0
for doc in text_blocks:
blocks = doc.get("blocks", [])
file_name = doc.get("fileName", "document")
file_id = doc.get("fileId", "")
text_parts = []
for b in blocks:
text_parts.append(b.get("text", ""))
full_text = "\n".join(text_parts)
if not full_text:
continue
start = 0
while start < len(full_text):
end = min(start + chunk_size, len(full_text))
chunk_text = full_text[start:end]
chunks.append({
"chunkIndex": chunk_index,
"text": chunk_text,
"fileName": file_name,
"fileId": file_id,
"startChar": start,
"endChar": end,
})
chunk_index += 1
start = end - chunk_overlap if end < len(full_text) else len(full_text)
return chunks
def score_chunk_relevance(chunk_text: str, question: str) -> float:
"""
Score how relevant a chunk is to the user question.
Uses keyword overlap with simple IDF-like weighting.
Returns 0 if no overlap.
"""
if not question or not chunk_text:
return 0.0
q_words = _extract_keywords(question)
if not q_words:
return 0.0
chunk_lower = chunk_text.lower()
score = 0.0
for w in q_words:
count = chunk_lower.count(w)
if count > 0:
score += 1.0 + 0.5 * min(count - 1, 3)
return score
def get_ordered_chunks_for_question(
chunks: List[Dict[str, Any]],
question: str
) -> List[Dict[str, Any]]:
"""
Return chunks ordered by relevance (or doc order if no match).
Does NOT limit by max_context_chars - used for iterative batch retrieval.
"""
if not chunks:
return []
scored = []
for c in chunks:
score = score_chunk_relevance(c.get("text", ""), question)
scored.append((score, c))
max_score = max(s for s, _ in scored)
if max_score > 0:
scored.sort(key=lambda x: (-x[0], x[1].get("chunkIndex", 0)))
else:
scored.sort(key=lambda x: x[1].get("chunkIndex", 0))
logger.debug("No chunk matched question keywords - using chunks in document order")
return [c for _, c in scored]
def get_chunk_batch(
ordered_chunks: List[Dict[str, Any]],
batch_index: int,
max_context_chars: int
) -> List[Dict[str, Any]]:
"""
Get the Nth batch of chunks that fits in max_context_chars.
Batch 0 = first chunk(s) that fit, batch 1 = next chunk(s), etc.
Returns [] when batch_index is beyond available chunks.
"""
if not ordered_chunks or batch_index < 0:
return []
used_chars = 0
chunks_in_batch = 0
start_idx = 0
# Find start index for this batch (skip chunks from previous batches)
for _ in range(batch_index):
batch_chars = 0
i = start_idx
while i < len(ordered_chunks):
text = ordered_chunks[i].get("text", "")
if batch_chars + len(text) <= max_context_chars:
batch_chars += len(text)
i += 1
else:
if batch_chars < max_context_chars and len(text) > 0:
batch_chars += min(len(text), max_context_chars - batch_chars)
i += 1
break
start_idx = i
if start_idx >= len(ordered_chunks):
return []
# Collect chunks for this batch
selected = []
for i in range(start_idx, len(ordered_chunks)):
chunk = ordered_chunks[i]
text = chunk.get("text", "")
if not text:
continue
if used_chars + len(text) <= max_context_chars:
selected.append(chunk)
used_chars += len(text)
else:
if used_chars < max_context_chars:
remaining = max_context_chars - used_chars
selected.append({**chunk, "text": text[:remaining]})
break
return selected
def select_chunks_for_question(
chunks: List[Dict[str, Any]],
question: str,
max_context_chars: int
) -> List[Dict[str, Any]]:
"""
Select first batch of chunks (for backward compatibility).
For iterative retry, use get_ordered_chunks_for_question + get_chunk_batch.
"""
ordered = get_ordered_chunks_for_question(chunks, question)
return get_chunk_batch(ordered, 0, max_context_chars)
# Phrases that indicate the AI found no relevant content (DE/EN/FR)
NOT_FOUND_PHRASES = [
"nicht enthalten",
"nicht im kontext",
"nicht im dokument",
"nicht im bereitgestellten",
"nicht auffindbar",
"keine information",
"keine angaben",
"nicht gefunden",
"nicht verfügbar",
"kein hinweis",
"enthalten nicht",
"artikel nicht enthalten",
"nicht im vorliegenden",
"bereitgestellten kontext enthält nicht",
"im kontext nicht",
"not contained",
"not found",
"no information",
"not in the context",
"not in the document",
"pas dans le contexte",
"non trouvé",
]
def response_indicates_not_found(response_content: str) -> bool:
"""
Check if the AI response indicates that the requested content was not found.
When True, we should try the next chunk batch.
"""
if not response_content or not isinstance(response_content, str):
return False
text_lower = response_content.lower().strip()
# Only treat short/medium responses as "not found" - long answers may mention it incidentally
if len(text_lower) > 600:
return False
for phrase in NOT_FOUND_PHRASES:
if phrase in text_lower:
return True
return False

View file

@ -1,160 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
LangGraph-based pipeline for extracting context from uploaded documents.
Creates chat context from PDF and text files (no domain-specific goals).
"""
import logging
from typing import TypedDict, List, Dict, Any, Optional
from langgraph.graph import StateGraph, START, END
logger = logging.getLogger(__name__)
class ContextExtractionState(TypedDict):
"""State for context extraction pipeline."""
# Input: list of {fileId, bytes, mimeType, fileName}
files: List[Dict[str, Any]]
# Extracted text blocks per file: [{fileId, fileName, blocks: [{page, text, block_id}]}]
textBlocks: List[Dict[str, Any]]
# Structured sections for chat context (simplified articles/sections)
sections: List[Dict[str, Any]]
# Optional summaries (empty for now - no LLM in extraction)
summaries: List[Dict[str, Any]]
errors: List[str]
def extract_text_node(state: ContextExtractionState) -> ContextExtractionState:
"""Extract text from each file. PDF via BZOPdfExtractor, TXT as plain text."""
text_blocks = []
errors = list(state.get("errors", []))
for idx, file_info in enumerate(state.get("files", [])):
file_id = file_info.get("fileId", f"file_{idx}")
file_bytes = file_info.get("bytes")
mime_type = (file_info.get("mimeType") or "").lower()
file_name = file_info.get("fileName", f"document_{idx}")
if not file_bytes:
errors.append(f"No content for file {file_name} ({file_id})")
continue
blocks = []
try:
if "pdf" in mime_type or file_name.lower().endswith(".pdf"):
from modules.features.realEstate.bzoPdfExtractor import BZOPdfExtractor
extractor = BZOPdfExtractor()
tb_list = extractor.extract_text_blocks(file_bytes, file_id)
for tb in tb_list:
blocks.append({
"page": tb.page,
"text": tb.text,
"block_id": tb.block_id,
"bbox": tb.bbox
})
logger.info(f"Extracted {len(blocks)} blocks from PDF {file_name}")
elif "text" in mime_type or file_name.lower().endswith(".txt"):
text = file_bytes.decode("utf-8", errors="replace")
lines = text.split("\n")
for i, line in enumerate(lines):
if line.strip():
blocks.append({
"page": 1,
"text": line.strip(),
"block_id": f"{file_id}_line_{i}",
"bbox": None
})
logger.info(f"Extracted {len(blocks)} lines from text file {file_name}")
else:
errors.append(f"Unsupported format for {file_name}: {mime_type}")
except Exception as e:
logger.error(f"Error extracting {file_name}: {e}", exc_info=True)
errors.append(f"Extraction failed for {file_name}: {str(e)}")
if blocks:
text_blocks.append({
"fileId": file_id,
"fileName": file_name,
"blocks": blocks
})
return {
**state,
"textBlocks": text_blocks,
"errors": errors
}
def structure_content_node(state: ContextExtractionState) -> ContextExtractionState:
"""Assemble text blocks into sections for chat context."""
sections = []
for doc in state.get("textBlocks", []):
file_name = doc.get("fileName", "document")
blocks = doc.get("blocks", [])
if not blocks:
continue
# Build section: combine blocks with page awareness
text_parts = []
current_page = 0
for b in blocks:
page = b.get("page", 1)
if page != current_page and text_parts:
text_parts.append("\n\n")
text_parts.append(b.get("text", ""))
current_page = page
full_text = "".join(text_parts).strip()
if full_text:
sections.append({
"fileId": doc.get("fileId"),
"fileName": file_name,
"text": full_text,
"blockCount": len(blocks)
})
return {
**state,
"sections": sections
}
def create_context_extraction_graph():
"""Create and compile the context extraction LangGraph."""
workflow = StateGraph(ContextExtractionState)
workflow.add_node("extract_text", extract_text_node)
workflow.add_node("structure_content", structure_content_node)
workflow.add_edge(START, "extract_text")
workflow.add_edge("extract_text", "structure_content")
workflow.add_edge("structure_content", END)
return workflow.compile()
def run_extraction(files: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Run the context extraction pipeline on uploaded files.
Args:
files: List of {fileId, bytes, mimeType, fileName}
Returns:
{
"textBlocks": [...],
"sections": [...],
"summaries": [],
"errors": [...]
}
"""
state: ContextExtractionState = {
"files": files,
"textBlocks": [],
"sections": [],
"summaries": [],
"errors": []
}
graph = create_context_extraction_graph()
final_state = graph.invoke(state)
return {
"textBlocks": final_state.get("textBlocks", []),
"sections": final_state.get("sections", []),
"summaries": final_state.get("summaries", []),
"errors": final_state.get("errors", [])
}

View file

@ -1,85 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Pydantic models for Chatbot V2 feature.
Stores context per chat: uploaded files, extracted content, and conversation history.
"""
import uuid
from typing import Optional, List, Dict, Any
from pydantic import BaseModel, Field
from modules.shared.timeUtils import getUtcTimestamp
class ChatbotV2ContextFile(BaseModel):
"""Uploaded file metadata for context extraction."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
conversationId: str = Field(description="Foreign key to conversation")
fileId: str = Field(description="Foreign key to file in central Files table")
fileName: str = Field(description="Original file name")
mimeType: str = Field(default="application/octet-stream", description="MIME type")
fileSize: int = Field(default=0, description="File size in bytes")
uploadOrder: int = Field(default=0, description="Order of upload (0-based)")
class ChatbotV2ExtractedContext(BaseModel):
"""Extracted content per conversation - text blocks and summaries for chat context."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
conversationId: str = Field(description="Foreign key to conversation")
textBlocks: List[Dict[str, Any]] = Field(default_factory=list, description="Extracted text blocks per file (page, text, block_id)")
summaries: List[Dict[str, Any]] = Field(default_factory=list, description="Optional per-document or per-section summaries")
extractionStatus: str = Field(default="pending", description="pending|running|completed|failed")
errors: List[str] = Field(default_factory=list, description="Extraction errors if any")
createdAt: float = Field(default_factory=getUtcTimestamp, description="When extraction completed")
class ChatbotV2Document(BaseModel):
"""Documents attached to chatbot V2 messages."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
messageId: str = Field(description="Foreign key to message")
fileId: str = Field(description="Foreign key to file")
fileName: str = Field(description="Name of the file")
fileSize: int = Field(default=0, description="Size of the file")
mimeType: str = Field(default="application/octet-stream", description="MIME type")
class ChatbotV2Message(BaseModel):
"""Messages in Chatbot V2 conversations."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
conversationId: str = Field(description="Foreign key to conversation")
parentMessageId: Optional[str] = Field(None, description="Parent message ID for threading")
message: Optional[str] = Field(None, description="Message content")
role: str = Field(description="Role: user or assistant")
status: str = Field(default="step", description="Status: first, step, last")
sequenceNr: int = Field(default=0, description="Sequence number of the message")
publishedAt: Optional[float] = Field(default=None, description="When the message was published (UTC timestamp)")
roundNumber: int = Field(default=1, description="Round number in conversation")
class ChatbotV2Log(BaseModel):
"""Log entries for Chatbot V2 conversations."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
conversationId: str = Field(description="Foreign key to conversation")
message: str = Field(description="Log message")
type: str = Field(default="info", description="Log type: info, warning, error")
timestamp: float = Field(default_factory=getUtcTimestamp, description="When the log entry was created")
status: Optional[str] = Field(None, description="Status of the log entry")
progress: Optional[float] = Field(None, description="Progress indicator (0.0 to 1.0)")
class ChatbotV2Conversation(BaseModel):
"""Chatbot V2 conversation - stores context information per chat."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
featureInstanceId: str = Field(description="Feature instance ID for per-instance isolation")
mandateId: Optional[str] = Field(None, description="Mandate ID for RBAC")
name: Optional[str] = Field(None, description="Name of the conversation")
status: str = Field(default="extracting", description="extracting|ready|running|stopped")
currentRound: int = Field(default=0, description="Current round number")
lastActivity: float = Field(default_factory=getUtcTimestamp, description="Timestamp of last activity")
startedAt: float = Field(default_factory=getUtcTimestamp, description="When the conversation started")
extractedContextId: Optional[str] = Field(None, description="FK to ChatbotV2ExtractedContext when ready")
maxSteps: int = Field(default=10, description="Maximum number of chat rounds")
# Hydrated from child tables (not stored in DB as columns)
contextFiles: List[ChatbotV2ContextFile] = Field(default_factory=list, description="Uploaded context files")
messages: List[ChatbotV2Message] = Field(default_factory=list, description="Conversation messages")

View file

@ -1,440 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Interface to Chatbot V2 database.
Manages context-aware conversations with file upload and extraction.
"""
import logging
import math
import uuid
from typing import Dict, Any, List, Optional, Union
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.configuration import APP_CONFIG
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
from modules.security.rbac import RbacClass
from modules.datamodels.datamodelRbac import AccessRuleContext
from modules.datamodels.datamodelUam import User, AccessLevel
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResult
from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp
from .datamodelFeatureChatbotV2 import (
ChatbotV2Conversation,
ChatbotV2ContextFile,
ChatbotV2ExtractedContext,
ChatbotV2Message,
ChatbotV2Document,
ChatbotV2Log,
)
logger = logging.getLogger(__name__)
_chatbotV2Interfaces: Dict[str, "ChatbotV2Objects"] = {}
FEATURE_CODE = "chatbotv2"
def getInterface(
currentUser: User,
mandateId: Optional[str] = None,
featureInstanceId: Optional[str] = None
) -> "ChatbotV2Objects":
"""Get or create a ChatbotV2Objects instance for the given user context."""
if not currentUser or not currentUser.id:
raise ValueError("Valid user context required")
key = f"{currentUser.id}_{mandateId or ''}_{featureInstanceId or ''}"
if key not in _chatbotV2Interfaces:
_chatbotV2Interfaces[key] = ChatbotV2Objects(
currentUser,
mandateId=mandateId,
featureInstanceId=featureInstanceId
)
else:
_chatbotV2Interfaces[key].setUserContext(
currentUser,
mandateId=mandateId,
featureInstanceId=featureInstanceId
)
return _chatbotV2Interfaces[key]
class ChatbotV2Objects:
"""Interface to Chatbot V2 database."""
def __init__(
self,
currentUser: Optional[User] = None,
mandateId: Optional[str] = None,
featureInstanceId: Optional[str] = None
):
self.currentUser = currentUser
self.userId = currentUser.id if currentUser else None
self.mandateId = mandateId
self.featureInstanceId = featureInstanceId
self.featureCode = FEATURE_CODE
self.rbac = None
self._initializeDatabase()
if currentUser:
self.setUserContext(currentUser, mandateId=mandateId, featureInstanceId=featureInstanceId)
def setUserContext(
self,
currentUser: User,
mandateId: Optional[str] = None,
featureInstanceId: Optional[str] = None
):
"""Set user context for the interface."""
self.currentUser = currentUser
self.userId = currentUser.id
self.mandateId = mandateId
self.featureInstanceId = featureInstanceId
if not self.userId:
raise ValueError("Invalid user context: id is required")
from modules.security.rootAccess import getRootDbAppConnector
dbApp = getRootDbAppConnector()
self.rbac = RbacClass(self.db, dbApp=dbApp)
self.db.updateContext(self.userId)
def __del__(self):
if hasattr(self, "db") and self.db is not None:
try:
self.db.close()
except Exception as e:
logger.error(f"Error closing database connection: {e}")
def _initializeDatabase(self):
try:
dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data")
dbDatabase = "poweron_chatbotv2"
dbUser = APP_CONFIG.get("DB_USER")
dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET")
dbPort = int(APP_CONFIG.get("DB_PORT", 5432))
self.db = DatabaseConnector(
dbHost=dbHost,
dbDatabase=dbDatabase,
dbUser=dbUser,
dbPassword=dbPassword,
dbPort=dbPort,
userId=self.userId
)
logger.info("ChatbotV2 database initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize ChatbotV2 database: {e}")
raise
def checkRbacPermission(
self,
modelClass: type,
operation: str,
recordId: Optional[str] = None
) -> bool:
"""Check RBAC permission for an operation on a table."""
if not self.rbac or not self.currentUser:
return False
from modules.interfaces.interfaceRbac import buildDataObjectKey
objectKey = buildDataObjectKey(modelClass.__name__, featureCode=self.featureCode)
permissions = self.rbac.getUserPermissions(
self.currentUser,
AccessRuleContext.DATA,
objectKey,
mandateId=self.mandateId,
featureInstanceId=self.featureInstanceId
)
if operation == "create":
return permissions.create != AccessLevel.NONE
if operation == "update":
return permissions.update != AccessLevel.NONE
if operation == "delete":
return permissions.delete != AccessLevel.NONE
if operation == "read":
return permissions.read != AccessLevel.NONE
return False
def _separateObjectFields(self, model_class, data: Dict[str, Any]) -> tuple:
"""Separate simple fields from object fields (contextFiles, messages)."""
simple_fields = {}
object_fields = {}
model_fields = model_class.model_fields
for field_name, value in data.items():
if field_name in model_fields:
if field_name in ("contextFiles", "messages"):
object_fields[field_name] = value
continue
if field_name.startswith("_"):
simple_fields[field_name] = value
elif isinstance(value, (str, int, float, bool, type(None))):
simple_fields[field_name] = value
elif field_name in model_fields:
field_info = model_fields[field_name]
if hasattr(field_info, "annotation"):
from typing import get_origin, get_args
origin = get_origin(field_info.annotation)
if origin in (dict, list):
simple_fields[field_name] = value
else:
object_fields[field_name] = value
else:
object_fields[field_name] = value
return simple_fields, object_fields
# ===== Conversation CRUD =====
def getConversations(
self,
pagination: Optional[PaginationParams] = None
) -> Union[List[Dict[str, Any]], PaginatedResult]:
"""Get conversations for current feature instance."""
record_filter = {}
if self.featureInstanceId:
record_filter["featureInstanceId"] = self.featureInstanceId
records = getRecordsetWithRBAC(
self.db,
ChatbotV2Conversation,
self.currentUser,
recordFilter=record_filter if record_filter else None,
orderBy="lastActivity",
mandateId=self.mandateId,
featureInstanceId=self.featureInstanceId,
featureCode=self.featureCode
)
if pagination is None:
return records
total = len(records)
page = pagination.page or 1
page_size = pagination.pageSize or 20
start = (page - 1) * page_size
items = records[start : start + page_size]
total_pages = math.ceil(total / page_size) if total > 0 else 0
return PaginatedResult(items=items, totalItems=total, totalPages=total_pages)
def getConversation(self, conversationId: str) -> Optional[ChatbotV2Conversation]:
"""Get a conversation by ID with hydrated context files and messages."""
records = getRecordsetWithRBAC(
self.db,
ChatbotV2Conversation,
self.currentUser,
recordFilter={"id": conversationId},
mandateId=self.mandateId,
featureInstanceId=self.featureInstanceId,
featureCode=self.featureCode
)
if not records:
return None
r = records[0]
context_files = self.getContextFiles(conversationId)
messages = self.getMessages(conversationId)
max_steps = r.get("maxSteps")
if max_steps is None:
max_steps = 10
return ChatbotV2Conversation(
id=r["id"],
featureInstanceId=r.get("featureInstanceId", "") or self.featureInstanceId or "",
mandateId=r.get("mandateId"),
name=r.get("name"),
status=r.get("status", "extracting"),
currentRound=r.get("currentRound", 0),
lastActivity=r.get("lastActivity", getUtcTimestamp()),
startedAt=r.get("startedAt", getUtcTimestamp()),
extractedContextId=r.get("extractedContextId"),
maxSteps=max_steps,
contextFiles=context_files,
messages=messages
)
def createConversation(self, data: Dict[str, Any]) -> ChatbotV2Conversation:
"""Create a new conversation."""
if not self.checkRbacPermission(ChatbotV2Conversation, "create"):
raise PermissionError("No permission to create conversations")
data["featureInstanceId"] = data.get("featureInstanceId") or self.featureInstanceId or ""
data["mandateId"] = data.get("mandateId") or self.mandateId
simple, obj = self._separateObjectFields(ChatbotV2Conversation, data)
if "maxSteps" not in simple or simple["maxSteps"] is None:
simple["maxSteps"] = 10
created = self.db.recordCreate(ChatbotV2Conversation, simple)
max_steps = created.get("maxSteps")
if max_steps is None:
max_steps = 10
return ChatbotV2Conversation(
id=created["id"],
featureInstanceId=created.get("featureInstanceId", ""),
mandateId=created.get("mandateId"),
name=created.get("name"),
status=created.get("status", "extracting"),
currentRound=created.get("currentRound", 0),
lastActivity=created.get("lastActivity", getUtcTimestamp()),
startedAt=created.get("startedAt", getUtcTimestamp()),
extractedContextId=created.get("extractedContextId"),
maxSteps=max_steps,
contextFiles=[],
messages=[]
)
def updateConversation(self, conversationId: str, data: Dict[str, Any]) -> Optional[ChatbotV2Conversation]:
"""Update a conversation."""
conv = self.getConversation(conversationId)
if not conv:
return None
if not self.checkRbacPermission(ChatbotV2Conversation, "update"):
raise PermissionError("No permission to update conversation")
simple, _ = self._separateObjectFields(ChatbotV2Conversation, data)
simple["lastActivity"] = getUtcTimestamp()
updated = self.db.recordModify(ChatbotV2Conversation, conversationId, simple)
if not updated:
return None
return self.getConversation(conversationId)
def deleteConversation(self, conversationId: str) -> bool:
"""Delete a conversation and all related data."""
conv = self.getConversation(conversationId)
if not conv:
return False
if not self.checkRbacPermission(ChatbotV2Conversation, "delete"):
raise PermissionError("No permission to delete conversation")
for cf in self.getContextFiles(conversationId):
self.db.recordDelete(ChatbotV2ContextFile, cf.id)
ctx = self.getExtractedContextByConversation(conversationId)
if ctx:
self.db.recordDelete(ChatbotV2ExtractedContext, ctx.id)
for msg in self.getMessages(conversationId):
for doc in self.getDocuments(msg.id):
self.db.recordDelete(ChatbotV2Document, doc.id)
self.db.recordDelete(ChatbotV2Message, msg.id)
for log in self.getLogs(conversationId):
self.db.recordDelete(ChatbotV2Log, log.id)
return self.db.recordDelete(ChatbotV2Conversation, conversationId)
# ===== Context File CRUD =====
def getContextFiles(self, conversationId: str) -> List[ChatbotV2ContextFile]:
"""Get context files for a conversation."""
records = getRecordsetWithRBAC(
self.db,
ChatbotV2ContextFile,
self.currentUser,
recordFilter={"conversationId": conversationId},
orderBy="uploadOrder",
mandateId=self.mandateId,
featureInstanceId=self.featureInstanceId,
featureCode=self.featureCode
)
return [ChatbotV2ContextFile(**r) for r in records]
def createContextFile(self, data: Dict[str, Any]) -> ChatbotV2ContextFile:
"""Create a context file record."""
return ChatbotV2ContextFile(**self.db.recordCreate(ChatbotV2ContextFile, data))
# ===== Extracted Context CRUD =====
def getExtractedContext(self, extractedContextId: str) -> Optional[ChatbotV2ExtractedContext]:
"""Get extracted context by ID."""
records = self.db.getRecordset(
ChatbotV2ExtractedContext,
recordFilter={"id": extractedContextId}
)
if not records:
return None
return ChatbotV2ExtractedContext(**records[0])
def getExtractedContextByConversation(self, conversationId: str) -> Optional[ChatbotV2ExtractedContext]:
"""Get extracted context for a conversation."""
records = self.db.getRecordset(
ChatbotV2ExtractedContext,
recordFilter={"conversationId": conversationId}
)
if not records:
return None
return ChatbotV2ExtractedContext(**records[0])
def createExtractedContext(self, data: Dict[str, Any]) -> ChatbotV2ExtractedContext:
"""Create extracted context record."""
created = self.db.recordCreate(ChatbotV2ExtractedContext, data)
return ChatbotV2ExtractedContext(**created)
def updateExtractedContext(self, extractedContextId: str, data: Dict[str, Any]) -> Optional[ChatbotV2ExtractedContext]:
"""Update extracted context."""
updated = self.db.recordModify(ChatbotV2ExtractedContext, extractedContextId, data)
return ChatbotV2ExtractedContext(**updated) if updated else None
# ===== Message CRUD =====
def getMessages(self, conversationId: str) -> List[ChatbotV2Message]:
"""Get messages for a conversation."""
records = getRecordsetWithRBAC(
self.db,
ChatbotV2Message,
self.currentUser,
recordFilter={"conversationId": conversationId},
orderBy="sequenceNr",
mandateId=self.mandateId,
featureInstanceId=self.featureInstanceId,
featureCode=self.featureCode
)
return [ChatbotV2Message(**r) for r in records]
def createMessage(self, data: Dict[str, Any]) -> ChatbotV2Message:
"""Create a message."""
if "id" not in data or not data["id"]:
data["id"] = str(uuid.uuid4())
created = self.db.recordCreate(ChatbotV2Message, data)
return ChatbotV2Message(**created)
# ===== Document CRUD =====
def getDocuments(self, messageId: str) -> List[ChatbotV2Document]:
"""Get documents for a message."""
records = self.db.getRecordset(
ChatbotV2Document,
recordFilter={"messageId": messageId}
)
return [ChatbotV2Document(**r) for r in records]
def createDocument(self, data: Dict[str, Any]) -> ChatbotV2Document:
"""Create a document record."""
created = self.db.recordCreate(ChatbotV2Document, data)
return ChatbotV2Document(**created)
# ===== Log CRUD =====
def getLogs(self, conversationId: str) -> List[ChatbotV2Log]:
"""Get logs for a conversation."""
records = self.db.getRecordset(
ChatbotV2Log,
recordFilter={"conversationId": conversationId}
)
# Sort by timestamp (connector doesn't support orderBy)
logs = [ChatbotV2Log(**r) for r in records]
logs.sort(key=lambda log: parseTimestamp(log.timestamp, default=0))
return logs
def createLog(self, data: Dict[str, Any]) -> ChatbotV2Log:
"""Create a log entry."""
if "timestamp" not in data:
data["timestamp"] = getUtcTimestamp()
created = self.db.recordCreate(ChatbotV2Log, data)
return ChatbotV2Log(**created)
# ===== Unified Chat Data (for streaming/polling) =====
def getUnifiedChatData(
self,
conversationId: str,
afterTimestamp: Optional[float] = None
) -> Dict[str, Any]:
"""Get unified chat data (messages, logs) in chronological order."""
conv = self.getConversation(conversationId)
if not conv:
return {"items": []}
items = []
for msg in conv.messages:
ts = parseTimestamp(msg.publishedAt, default=getUtcTimestamp())
if afterTimestamp is not None and ts <= afterTimestamp:
continue
items.append({"type": "message", "createdAt": ts, "item": msg})
for log in self.getLogs(conversationId):
ts = parseTimestamp(log.timestamp, default=getUtcTimestamp())
if afterTimestamp is not None and ts <= afterTimestamp:
continue
items.append({"type": "log", "createdAt": ts, "item": log})
items.sort(key=lambda x: x.get("createdAt", 0))
return {"items": items}

View file

@ -1,250 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Chatbot V2 Feature Container - Main Module.
Handles feature initialization and RBAC catalog registration.
Context-aware chat with file upload and extraction.
"""
import logging
from typing import Dict, List, Any
logger = logging.getLogger(__name__)
# Feature metadata
FEATURE_CODE = "chatbotv2"
FEATURE_LABEL = {"en": "Chatbot V2", "de": "Chatbot V2", "fr": "Chatbot V2"}
FEATURE_ICON = "mdi-robot-outline"
# UI Objects for RBAC catalog - single "conversations" view (upload + chat in one page)
UI_OBJECTS = [
{
"objectKey": "ui.feature.chatbotv2.conversations",
"label": {"en": "Conversations", "de": "Konversationen", "fr": "Conversations"},
"meta": {"area": "conversations"}
},
]
# Resource Objects for RBAC catalog
RESOURCE_OBJECTS = [
{
"objectKey": "resource.feature.chatbotv2.upload",
"label": {"en": "Upload Files", "de": "Dateien hochladen", "fr": "Télécharger fichiers"},
"meta": {"endpoint": "/api/chatbotv2/{instanceId}/upload", "method": "POST"}
},
{
"objectKey": "resource.feature.chatbotv2.startStream",
"label": {"en": "Start Chat (Stream)", "de": "Chat starten (Stream)", "fr": "Démarrer chat (Stream)"},
"meta": {"endpoint": "/api/chatbotv2/{instanceId}/start/stream", "method": "POST"}
},
{
"objectKey": "resource.feature.chatbotv2.stop",
"label": {"en": "Stop Chat", "de": "Chat stoppen", "fr": "Arrêter chat"},
"meta": {"endpoint": "/api/chatbotv2/{instanceId}/stop/{workflowId}", "method": "POST"}
},
{
"objectKey": "resource.feature.chatbotv2.threads",
"label": {"en": "Get Threads", "de": "Threads abrufen", "fr": "Récupérer threads"},
"meta": {"endpoint": "/api/chatbotv2/{instanceId}/threads", "method": "GET"}
},
{
"objectKey": "resource.feature.chatbotv2.delete",
"label": {"en": "Delete Chat", "de": "Chat löschen", "fr": "Supprimer chat"},
"meta": {"endpoint": "/api/chatbotv2/{instanceId}/conversations/{workflowId}", "method": "DELETE"}
},
]
# Template roles for this feature
TEMPLATE_ROLES = [
{
"roleLabel": "chatbotv2-viewer",
"description": {
"en": "Chatbot V2 Viewer - View threads (read-only)",
"de": "Chatbot V2 Betrachter - Threads ansehen (nur lesen)",
"fr": "Visualiseur Chatbot V2 - Consulter les threads (lecture seule)"
},
"accessRules": [
{"context": "UI", "item": "ui.feature.chatbotv2.conversations", "view": True},
{"context": "RESOURCE", "item": "resource.feature.chatbotv2.threads", "view": True},
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "n", "update": "n", "delete": "n"},
]
},
{
"roleLabel": "chatbotv2-user",
"description": {
"en": "Chatbot V2 User - Upload, extract, and chat with own threads",
"de": "Chatbot V2 Benutzer - Hochladen, extrahieren und chatten mit eigenen Threads",
"fr": "Utilisateur Chatbot V2 - Upload, extraction et chat avec ses threads"
},
"accessRules": [
{"context": "UI", "item": "ui.feature.chatbotv2.conversations", "view": True},
{"context": "RESOURCE", "item": "resource.feature.chatbotv2.upload", "view": True},
{"context": "RESOURCE", "item": "resource.feature.chatbotv2.startStream", "view": True},
{"context": "RESOURCE", "item": "resource.feature.chatbotv2.stop", "view": True},
{"context": "RESOURCE", "item": "resource.feature.chatbotv2.threads", "view": True},
{"context": "RESOURCE", "item": "resource.feature.chatbotv2.delete", "view": True},
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "m", "update": "m", "delete": "m"},
]
},
{
"roleLabel": "chatbotv2-admin",
"description": {
"en": "Chatbot V2 Admin - Full access to all features",
"de": "Chatbot V2 Admin - Vollzugriff auf alle Funktionen",
"fr": "Administrateur Chatbot V2 - Accès complet"
},
"accessRules": [
{"context": "UI", "item": None, "view": True},
{"context": "RESOURCE", "item": None, "view": True},
{"context": "DATA", "item": None, "view": True, "read": "a", "create": "a", "update": "a", "delete": "a"},
]
},
]
def getFeatureDefinition() -> Dict[str, Any]:
"""Return the feature definition for registration."""
return {
"code": FEATURE_CODE,
"label": FEATURE_LABEL,
"icon": FEATURE_ICON,
}
def getUiObjects() -> List[Dict[str, Any]]:
"""Return UI objects for RBAC catalog registration."""
return UI_OBJECTS
def getResourceObjects() -> List[Dict[str, Any]]:
"""Return resource objects for RBAC catalog registration."""
return RESOURCE_OBJECTS
def getTemplateRoles() -> List[Dict[str, Any]]:
"""Return template roles for this feature."""
return TEMPLATE_ROLES
def registerFeature(catalogService) -> bool:
"""
Register this feature's RBAC objects in the catalog.
"""
try:
for uiObj in UI_OBJECTS:
catalogService.registerUiObject(
featureCode=FEATURE_CODE,
objectKey=uiObj["objectKey"],
label=uiObj["label"],
meta=uiObj.get("meta")
)
for resObj in RESOURCE_OBJECTS:
catalogService.registerResourceObject(
featureCode=FEATURE_CODE,
objectKey=resObj["objectKey"],
label=resObj["label"],
meta=resObj.get("meta")
)
_syncTemplateRolesToDb()
logger.info(f"Feature '{FEATURE_CODE}' registered {len(UI_OBJECTS)} UI objects and {len(RESOURCE_OBJECTS)} resource objects")
return True
except Exception as e:
logger.error(f"Failed to register feature '{FEATURE_CODE}': {e}")
return False
def _syncTemplateRolesToDb() -> int:
"""Sync template roles and their AccessRules to the database."""
try:
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.datamodels.datamodelRbac import Role, AccessRule, AccessRuleContext
rootInterface = getRootInterface()
existingRoles = rootInterface.getRolesByFeatureCode(FEATURE_CODE)
templateRoles = [r for r in existingRoles if r.mandateId is None]
existingRoleLabels = {r.roleLabel: str(r.id) for r in templateRoles}
createdCount = 0
for roleTemplate in TEMPLATE_ROLES:
roleLabel = roleTemplate["roleLabel"]
if roleLabel in existingRoleLabels:
roleId = existingRoleLabels[roleLabel]
_ensureAccessRulesForRole(rootInterface, roleId, roleTemplate.get("accessRules", []))
else:
newRole = Role(
roleLabel=roleLabel,
description=roleTemplate.get("description", {}),
featureCode=FEATURE_CODE,
mandateId=None,
featureInstanceId=None,
isSystemRole=False
)
createdRole = rootInterface.db.recordCreate(Role, newRole.model_dump())
roleId = createdRole.get("id")
_ensureAccessRulesForRole(rootInterface, roleId, roleTemplate.get("accessRules", []))
logger.info(f"Created template role '{roleLabel}' with ID {roleId}")
createdCount += 1
if createdCount > 0:
logger.info(f"Feature '{FEATURE_CODE}': Created {createdCount} template roles")
return createdCount
except Exception as e:
logger.error(f"Error syncing template roles for feature '{FEATURE_CODE}': {e}")
return 0
def _ensureAccessRulesForRole(rootInterface, roleId: str, ruleTemplates: List[Dict[str, Any]]) -> int:
"""Ensure AccessRules exist for a role based on templates."""
from modules.datamodels.datamodelRbac import AccessRule, AccessRuleContext
existingRules = rootInterface.getAccessRulesByRole(roleId)
existingSignatures = set()
for rule in existingRules:
sig = (rule.context.value if rule.context else None, rule.item)
existingSignatures.add(sig)
createdCount = 0
for template in ruleTemplates:
context = template.get("context", "UI")
item = template.get("item")
sig = (context, item)
if sig in existingSignatures:
continue
if context == "UI":
contextEnum = AccessRuleContext.UI
elif context == "DATA":
contextEnum = AccessRuleContext.DATA
elif context == "RESOURCE":
contextEnum = AccessRuleContext.RESOURCE
else:
contextEnum = context
newRule = AccessRule(
roleId=roleId,
context=contextEnum,
item=item,
view=template.get("view", False),
read=template.get("read"),
create=template.get("create"),
update=template.get("update"),
delete=template.get("delete"),
)
rootInterface.db.recordCreate(AccessRule, newRule.model_dump())
createdCount += 1
if createdCount > 0:
logger.debug(f"Created {createdCount} AccessRules for role {roleId}")
return createdCount

View file

@ -1,350 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Chatbot V2 routes - context-aware chat with file upload and extraction.
"""
import asyncio
import json
import math
import logging
import uuid
from typing import Optional, Any, Dict, Union
from fastapi import APIRouter, HTTPException, Depends, Body, Path, Query, Request, status, UploadFile, File
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from modules.auth import limiter, getRequestContext, RequestContext
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceFeatures import getFeatureInterface
from modules.datamodels.datamodelChat import UserInputRequest
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata
from modules.shared.timeUtils import getUtcTimestamp
from . import interfaceFeatureChatbotV2 as interfaceDbChat
from .interfaceFeatureChatbotV2 import getInterface as getChatbotV2Interface
from .datamodelFeatureChatbotV2 import ChatbotV2Conversation
from .serviceChatbotV2 import uploadAndExtract, chatProcessV2
from modules.features.chatbot.streaming.events import get_event_manager
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/api/chatbotv2",
tags=["Chatbot V2"],
responses={404: {"description": "Not found"}}
)
class UploadRequest(BaseModel):
"""Request body for file upload - files must be uploaded to central storage first."""
listFileId: list[str] = Field(default_factory=list, description="List of file IDs from central storage")
def _getServiceChat(context: RequestContext, instanceId: Optional[str] = None):
"""Get ChatbotV2 interface with instance context."""
mandateId = str(context.mandateId) if context.mandateId else None
return getChatbotV2Interface(
context.user,
mandateId=mandateId,
featureInstanceId=instanceId
)
def _validateInstanceAccess(instanceId: str, context: RequestContext) -> str:
"""Validate that the user has access to the feature instance."""
rootInterface = getRootInterface()
featureInterface = getFeatureInterface(rootInterface.db)
instance = featureInterface.getFeatureInstance(instanceId)
if not instance:
raise HTTPException(
status_code=404,
detail=f"Feature instance '{instanceId}' not found"
)
if instance.featureCode != "chatbotv2":
raise HTTPException(
status_code=400,
detail=f"Instance '{instanceId}' is not a chatbotv2 instance"
)
if not context.hasSysAdminRole:
featureAccesses = rootInterface.getFeatureAccessesForUser(str(context.user.id))
hasAccess = any(
str(fa.featureInstanceId) == instanceId and fa.enabled
for fa in featureAccesses
)
if not hasAccess:
raise HTTPException(
status_code=403,
detail=f"Access denied to feature instance '{instanceId}'"
)
return str(instance.mandateId)
# =============================================================================
# Upload - start extraction
# =============================================================================
@router.post("/{instanceId}/upload")
@limiter.limit("60/minute")
async def upload_files(
request: Request,
instanceId: str = Path(..., description="Feature Instance ID"),
body: UploadRequest = Body(...),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""
Upload files as context and start extraction.
Files must be uploaded to central storage first; pass their file IDs in listFileId.
Returns conversationId. Extraction runs in background; poll threads or use SSE for status.
"""
mandateId = _validateInstanceAccess(instanceId, context)
if not body.listFileId:
raise HTTPException(status_code=400, detail="listFileId is required and must not be empty")
try:
conversation = await uploadAndExtract(
context.user,
mandateId=mandateId,
instanceId=instanceId,
listFileId=body.listFileId
)
return {
"conversationId": conversation.id,
"status": conversation.status,
"message": "Extraction started. Poll GET /threads?workflowId={} for status.".format(conversation.id)
}
except Exception as e:
logger.error(f"Error in upload_files: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
# =============================================================================
# List threads - MUST be first to avoid /{instanceId}/{workflowId} matching
# =============================================================================
@router.get("/{instanceId}/threads")
@limiter.limit("120/minute")
def get_threads(
request: Request,
instanceId: str = Path(..., description="Feature Instance ID"),
workflowId: Optional[str] = Query(None, description="Optional workflow/conversation ID for details"),
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams"),
context: RequestContext = Depends(getRequestContext)
) -> Union[PaginatedResponse, Dict[str, Any]]:
"""List conversations or get details for a specific one."""
_validateInstanceAccess(instanceId, context)
interfaceDbChat = _getServiceChat(context, instanceId)
if workflowId:
conv = interfaceDbChat.getConversation(workflowId)
if not conv:
raise HTTPException(status_code=404, detail=f"Conversation {workflowId} not found")
workflow_dict = conv.model_dump()
chatData = interfaceDbChat.getUnifiedChatData(workflowId, None)
return {"workflow": workflow_dict, "chatData": chatData}
paginationParams = None
if pagination:
try:
paginationDict = json.loads(pagination)
paginationParams = PaginationParams(**paginationDict) if paginationDict else None
except (json.JSONDecodeError, ValueError) as e:
raise HTTPException(status_code=400, detail=f"Invalid pagination parameter: {str(e)}")
all_convs = interfaceDbChat.getConversations(pagination=None)
# all_convs from getConversations can be list of dicts (from getRecordsetWithRBAC)
items = [c if isinstance(c, dict) else c.model_dump() for c in all_convs]
if paginationParams:
totalItems = len(items)
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
startIdx = (paginationParams.page - 1) * paginationParams.pageSize
endIdx = startIdx + paginationParams.pageSize
workflows = items[startIdx:endIdx]
else:
workflows = items
totalItems = len(items)
totalPages = 1
metadata = PaginationMetadata(
currentPage=paginationParams.page if paginationParams else 1,
pageSize=paginationParams.pageSize if paginationParams else len(workflows),
totalItems=totalItems,
totalPages=totalPages,
sort=paginationParams.sort if paginationParams else [],
filters=paginationParams.filters if paginationParams else None
)
return PaginatedResponse(items=workflows, pagination=metadata)
# =============================================================================
# Start/continue chat (SSE stream)
# =============================================================================
@router.post("/{instanceId}/start/stream")
@limiter.limit("120/minute")
async def stream_chat_start(
request: Request,
instanceId: str = Path(..., description="Feature Instance ID"),
workflowId: Optional[str] = Query(None, description="Optional conversation ID to continue"),
userInput: UserInputRequest = Body(...),
context: RequestContext = Depends(getRequestContext)
) -> StreamingResponse:
"""Start or continue a chat with SSE streaming."""
mandateId = _validateInstanceAccess(instanceId, context)
event_manager = get_event_manager()
final_workflow_id = workflowId or userInput.workflowId
try:
workflow = await chatProcessV2(
context.user,
mandateId=mandateId,
userInput=userInput,
conversationId=final_workflow_id,
instanceId=instanceId
)
if not workflow:
raise HTTPException(status_code=500, detail="Failed to create or load workflow")
queue = event_manager.get_queue(workflow.id)
if not queue:
queue = event_manager.create_queue(workflow.id)
async def event_stream():
try:
interfaceDbChat = _getServiceChat(context, instanceId)
chatData = interfaceDbChat.getUnifiedChatData(workflow.id, None)
if chatData.get("items"):
for item in chatData["items"]:
ser = {
"type": item.get("type"),
"createdAt": item.get("createdAt"),
"item": item.get("item").model_dump() if hasattr(item.get("item"), "model_dump") else item.get("item")
}
yield f"data: {json.dumps(ser)}\n\n"
keepalive_interval = 30.0
last_keepalive = asyncio.get_event_loop().time()
status_check_interval = 5.0
last_status_check = asyncio.get_event_loop().time()
timeout = 300.0
start_time = asyncio.get_event_loop().time()
while True:
elapsed = asyncio.get_event_loop().time() - start_time
if elapsed > timeout:
break
if await request.is_disconnected():
break
current_time = asyncio.get_event_loop().time()
if current_time - last_status_check >= status_check_interval:
try:
cw = interfaceDbChat.getConversation(workflow.id)
if cw and cw.status == "stopped":
break
except Exception:
pass
last_status_check = current_time
try:
event = await asyncio.wait_for(queue.get(), timeout=1.0)
event_type = event.get("type")
event_data = event.get("data", {})
if event_type == "chatdata" and event_data:
if event_data.get("type") == "status":
yield f"data: {json.dumps({'type': 'status', 'label': event_data.get('label', '')})}\n\n"
else:
item = event_data
if isinstance(item, dict) and "item" in item:
obj = item.get("item")
if hasattr(obj, "model_dump"):
item = {**item, "item": obj.model_dump()}
yield f"data: {json.dumps(item)}\n\n"
elif event_type in ("complete", "stopped"):
break
elif event_type == "error" and event.get("step") == "error":
break
last_keepalive = current_time
except asyncio.TimeoutError:
if current_time - last_keepalive >= keepalive_interval:
yield ": keepalive\n\n"
last_keepalive = current_time
except Exception as e:
logger.error(f"Error in event stream: {e}")
break
except Exception as e:
logger.error(f"Error in event stream generator: {e}", exc_info=True)
return StreamingResponse(
event_stream(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no"
}
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error in stream_chat_start: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
# =============================================================================
# Stop chat
# =============================================================================
@router.post("/{instanceId}/stop/{workflowId}")
@limiter.limit("120/minute")
async def stop_chat(
request: Request,
instanceId: str = Path(..., description="Feature Instance ID"),
workflowId: str = Path(..., description="Conversation ID to stop"),
context: RequestContext = Depends(getRequestContext)
) -> ChatbotV2Conversation:
"""Stop a running chat."""
_validateInstanceAccess(instanceId, context)
interfaceDbChat = _getServiceChat(context, instanceId)
conv = interfaceDbChat.getConversation(workflowId)
if not conv:
raise HTTPException(status_code=404, detail=f"Conversation {workflowId} not found")
interfaceDbChat.updateConversation(workflowId, {"status": "stopped", "lastActivity": getUtcTimestamp()})
interfaceDbChat.createLog({
"conversationId": workflowId,
"message": "Workflow stopped by user",
"type": "warning",
"status": "stopped",
"timestamp": getUtcTimestamp()
})
event_manager = get_event_manager()
await event_manager.emit_event(
context_id=workflowId,
event_type="stopped",
data={"workflowId": workflowId},
event_category="workflow",
message="Workflow stopped by user",
step="stopped"
)
return interfaceDbChat.getConversation(workflowId)
# =============================================================================
# Delete conversation - use /conversations/{workflowId} to avoid
# /{instanceId}/{workflowId} matching GET /threads (workflowId="threads")
# =============================================================================
@router.delete("/{instanceId}/conversations/{workflowId}")
@limiter.limit("120/minute")
def delete_conversation(
request: Request,
instanceId: str = Path(..., description="Feature Instance ID"),
workflowId: str = Path(..., description="Conversation ID to delete"),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Delete a conversation and its data."""
_validateInstanceAccess(instanceId, context)
interfaceDbChat = _getServiceChat(context, instanceId)
conv = interfaceDbChat.getConversation(workflowId)
if not conv:
raise HTTPException(status_code=404, detail=f"Conversation {workflowId} not found")
success = interfaceDbChat.deleteConversation(workflowId)
if not success:
raise HTTPException(status_code=500, detail="Failed to delete conversation")
return {"id": workflowId, "message": "Conversation deleted successfully"}

View file

@ -1,258 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Chatbot V2 service - orchestration for upload/extraction and chat.
"""
import logging
import uuid
from typing import Optional, List
from modules.datamodels.datamodelUam import User
from modules.datamodels.datamodelChat import UserInputRequest
from modules.datamodels.datamodelAi import OperationTypeEnum, ProcessingModeEnum
from modules.shared.timeUtils import getUtcTimestamp
from modules.services import getInterface as getServices
from .interfaceFeatureChatbotV2 import getInterface as getChatbotV2Interface
from .datamodelFeatureChatbotV2 import ChatbotV2Conversation
from .contextExtractionLangGraph import run_extraction
from .chatbotV2 import create_chat_graph
from .config import load_chatbotv2_config_from_instance
from .bridges import AICenterChatModel, clear_workflow_allowed_providers, ChatbotV2Checkpointer
from modules.features.chatbot.streaming.events import get_event_manager
logger = logging.getLogger(__name__)
async def _load_config(instance_id: Optional[str]):
"""Load ChatbotV2 config from feature instance."""
if not instance_id:
return None
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceFeatures import getFeatureInterface
root = getRootInterface()
feat = getFeatureInterface(root.db)
instance = feat.getFeatureInstance(instance_id)
if not instance:
return None
return load_chatbotv2_config_from_instance(instance)
async def uploadAndExtract(
currentUser: User,
mandateId: Optional[str],
instanceId: str,
listFileId: List[str]
) -> ChatbotV2Conversation:
"""
Create conversation, store context files, run extraction, save result.
"""
interface = getChatbotV2Interface(currentUser, mandateId=mandateId, featureInstanceId=instanceId)
services = getServices(currentUser, mandateId=mandateId, featureInstanceId=instanceId)
conversation_id = str(uuid.uuid4())
conv_data = {
"id": conversation_id,
"featureInstanceId": instanceId,
"mandateId": mandateId,
"status": "extracting",
"name": "Context Chat",
"currentRound": 0,
"maxSteps": 10,
"startedAt": getUtcTimestamp(),
"lastActivity": getUtcTimestamp()
}
conv = interface.createConversation(conv_data)
files_for_extraction = []
for idx, file_id in enumerate(listFileId):
try:
file_info = services.chat.getFileInfo(file_id)
if not file_info:
logger.warning(f"File {file_id} not found")
continue
file_bytes = services.chat.getFileData(file_id)
if not file_bytes:
logger.warning(f"No data for file {file_id}")
continue
interface.createContextFile({
"conversationId": conversation_id,
"fileId": file_id,
"fileName": file_info.get("fileName", "document"),
"mimeType": file_info.get("mimeType", "application/octet-stream"),
"fileSize": file_info.get("size", 0),
"uploadOrder": idx
})
files_for_extraction.append({
"fileId": file_id,
"bytes": file_bytes,
"mimeType": file_info.get("mimeType", "application/octet-stream"),
"fileName": file_info.get("fileName", "document")
})
except Exception as e:
logger.error(f"Error loading file {file_id}: {e}", exc_info=True)
if not files_for_extraction:
interface.updateConversation(conversation_id, {"status": "ready"})
interface.createExtractedContext({
"conversationId": conversation_id,
"textBlocks": [],
"summaries": [],
"extractionStatus": "failed",
"errors": ["No files could be loaded"]
})
return interface.getConversation(conversation_id)
result = run_extraction(files_for_extraction)
# Store sections in summaries for chat context (build_context_system_prompt uses sections)
summaries = result.get("summaries", []) or result.get("sections", [])
extracted = interface.createExtractedContext({
"conversationId": conversation_id,
"textBlocks": result.get("textBlocks", []),
"summaries": summaries,
"extractionStatus": "completed",
"errors": result.get("errors", []),
"createdAt": getUtcTimestamp()
})
interface.updateConversation(conversation_id, {
"status": "ready",
"extractedContextId": extracted.id,
"lastActivity": getUtcTimestamp()
})
return interface.getConversation(conversation_id)
async def chatProcessV2(
currentUser: User,
mandateId: Optional[str],
userInput: UserInputRequest,
conversationId: Optional[str],
instanceId: str
) -> Optional[ChatbotV2Conversation]:
"""
Run chat with extracted context. Creates or resumes conversation.
"""
interface = getChatbotV2Interface(currentUser, mandateId=mandateId, featureInstanceId=instanceId)
event_manager = get_event_manager()
config = await _load_config(instanceId)
base_prompt = config.systemPrompt if config else "You are a helpful assistant. Answer based on the provided context."
if conversationId:
conv = interface.getConversation(conversationId)
if not conv:
raise ValueError(f"Conversation {conversationId} not found")
if conv.status == "extracting":
raise ValueError("Conversation not ready for chat (status: extracting). Wait for extraction to complete.")
# Reset stale "running" from previous failed/interrupted request
if conv.status == "running":
logger.info("Resetting stale conversation status from 'running' to 'ready'")
interface.updateConversation(conversationId, {"status": "ready"})
new_round = conv.currentRound + 1
interface.updateConversation(conversationId, {
"status": "running",
"currentRound": new_round,
"lastActivity": getUtcTimestamp()
})
conv = interface.getConversation(conversationId)
if not event_manager.has_queue(conversationId):
event_manager.create_queue(conversationId)
else:
raise ValueError("conversationId is required for Chatbot V2 chat")
extracted_ctx = interface.getExtractedContextByConversation(conversationId)
ctx_dict = {"textBlocks": [], "sections": []}
if extracted_ctx:
tb = extracted_ctx.textBlocks or []
ctx_dict["textBlocks"] = tb
# summaries hold sections from extraction (fileName, text, blockCount)
ctx_dict["sections"] = extracted_ctx.summaries if isinstance(extracted_ctx.summaries, list) else []
if not ctx_dict["sections"] and tb:
# Build sections from textBlocks if summaries empty
for doc in tb:
blocks = doc.get("blocks", [])
text_parts = [b.get("text", "") for b in blocks]
ctx_dict["sections"].append({
"fileId": doc.get("fileId"),
"fileName": doc.get("fileName", "document"),
"text": "\n".join(text_parts),
"blockCount": len(blocks)
})
user_msg = userInput.prompt or ""
if not user_msg.strip():
raise ValueError("Prompt is required")
max_context_chars = config.maxContextChars if config else None
chunk_size = config.chunkSize if config else None
chunk_overlap = config.chunkOverlap if config else None
# Resolve to concrete values (chat node reads from configurable)
max_ctx = max_context_chars if max_context_chars and max_context_chars > 0 else 60_000
cs = chunk_size if chunk_size and chunk_size > 0 else 15_000
co = chunk_overlap if chunk_overlap is not None and chunk_overlap >= 0 else 500
allowed_providers = config.model.allowedProviders if config else []
services = getServices(currentUser, mandateId=mandateId, featureInstanceId=instanceId)
if allowed_providers:
services.allowedProviders = allowed_providers # type: ignore[attr-defined]
model = AICenterChatModel(
user=currentUser,
operation_type=OperationTypeEnum.DATA_ANALYSE,
processing_mode=ProcessingModeEnum.BASIC,
workflow_id=conversationId,
allowed_providers=allowed_providers if allowed_providers else None
)
memory = ChatbotV2Checkpointer(
user=currentUser,
workflow_id=conversationId,
mandateId=mandateId,
featureInstanceId=instanceId
)
app = create_chat_graph(model, memory)
chatbotv2_context = {
"ctx_dict": ctx_dict,
"user_question": user_msg,
"base_prompt": base_prompt,
"max_context_chars": max_ctx,
"chunk_size": cs,
"chunk_overlap": co,
}
interface.createMessage({
"id": str(uuid.uuid4()),
"conversationId": conversationId,
"message": user_msg,
"role": "user",
"status": "first",
"sequenceNr": len(interface.getMessages(conversationId)) + 1,
"publishedAt": getUtcTimestamp(),
"roundNumber": conv.currentRound
})
try:
result = await app.ainvoke(
{
"messages": [{"role": "user", "content": user_msg}],
"chatbotv2_context": chatbotv2_context,
},
config={"configurable": {"thread_id": conversationId}}
)
# Assistant message is stored by ChatbotV2Checkpointer.put() - do NOT create again here.
# The stream sends chatData at start (from DB), so no need to emit chatdata event.
await event_manager.emit_event(
context_id=conversationId,
event_type="complete",
data={},
event_category="workflow"
)
finally:
clear_workflow_allowed_providers(conversationId)
interface.updateConversation(conversationId, {
"status": "ready",
"lastActivity": getUtcTimestamp()
})
return interface.getConversation(conversationId)

View file

@ -119,9 +119,6 @@ def _getFeatureUiObjects(featureCode: str) -> List[Dict[str, Any]]:
elif featureCode == "chatbot": elif featureCode == "chatbot":
from modules.features.chatbot.mainChatbot import UI_OBJECTS from modules.features.chatbot.mainChatbot import UI_OBJECTS
return UI_OBJECTS return UI_OBJECTS
elif featureCode == "chatbotv2":
from modules.features.chatbotV2.mainChatbotV2 import UI_OBJECTS
return UI_OBJECTS
else: else:
logger.warning(f"Unknown feature code: {featureCode}") logger.warning(f"Unknown feature code: {featureCode}")
return [] return []

View file

@ -38,11 +38,10 @@ def discoverFeatureContainers() -> List[str]:
def _router_load_order_key(filepath: str) -> tuple: def _router_load_order_key(filepath: str) -> tuple:
""" """
Sort key for router loading. Longer/ Lexicographically later prefixes first Sort key for router loading. Longer/lexicographically later prefixes first
so that /api/chatbotv2 is registered before /api/chatbot (avoids prefix collision). so that more specific routes (e.g. /api/chatplayground) register before generic ones.
""" """
featureDir = os.path.basename(os.path.dirname(filepath)) featureDir = os.path.basename(os.path.dirname(filepath))
# chatbotV2 before chatbot - use negative length then name so longer names sort first
return (-len(featureDir), featureDir) return (-len(featureDir), featureDir)

View file

@ -1,102 +0,0 @@
#!/usr/bin/env python3
"""
Initialize poweron_chatbotv2 database for the Chatbot V2 feature.
Creates the poweron_chatbotv2 database if it does not exist.
Uses DB_CHATBOTV2_* config (falls back to DB_*).
Tables (ChatbotV2Conversation, ChatbotV2ContextFile, ChatbotV2ExtractedContext,
ChatbotV2Message, ChatbotV2Document, ChatbotV2Log) are auto-created by the
connector on first use.
Usage:
python script_db_init_chatbotv2.py [--dry-run]
"""
import os
import sys
import argparse
import logging
from pathlib import Path
scriptPath = Path(__file__).resolve()
gatewayPath = scriptPath.parent.parent
sys.path.insert(0, str(gatewayPath))
os.chdir(str(gatewayPath))
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
import psycopg2
from modules.shared.configuration import APP_CONFIG
DB_NAME = "poweron_chatbotv2"
CONFIG_PREFIX = "DB_CHATBOTV2"
def _get_config():
"""Get DB config: DB_CHATBOTV2_* with fallback to DB_*."""
host = APP_CONFIG.get(f"{CONFIG_PREFIX}_HOST") or APP_CONFIG.get("DB_HOST", "localhost")
port = int(APP_CONFIG.get(f"{CONFIG_PREFIX}_PORT") or APP_CONFIG.get("DB_PORT", "5432"))
user = APP_CONFIG.get(f"{CONFIG_PREFIX}_USER") or APP_CONFIG.get("DB_USER")
password = (
APP_CONFIG.get(f"{CONFIG_PREFIX}_PASSWORD_SECRET")
or APP_CONFIG.get(f"{CONFIG_PREFIX}_PASSWORD")
or APP_CONFIG.get("DB_PASSWORD_SECRET")
or APP_CONFIG.get("DB_PASSWORD")
)
return {"host": host, "port": port, "user": user, "password": password}
def init_chatbotv2_db(dry_run: bool = False) -> bool:
"""Create poweron_chatbotv2 database if it does not exist."""
config = _get_config()
if not config["user"] or not config["password"]:
logger.error("DB_USER and DB_PASSWORD (or DB_CHATBOTV2_*) required")
return False
try:
conn = psycopg2.connect(
host=config["host"],
port=config["port"],
database="postgres",
user=config["user"],
password=config["password"],
)
conn.autocommit = True
with conn.cursor() as cur:
cur.execute(
"SELECT 1 FROM pg_database WHERE datname = %s",
(DB_NAME,),
)
exists = cur.fetchone() is not None
if exists:
logger.info(f"Database {DB_NAME} already exists")
else:
if dry_run:
logger.info(f"[DRY-RUN] Would create database {DB_NAME}")
else:
cur.execute(f'CREATE DATABASE "{DB_NAME}"')
logger.info(f"Created database {DB_NAME}")
conn.close()
return True
except Exception as e:
logger.error(f"Failed to init {DB_NAME}: {e}")
return False
def main():
parser = argparse.ArgumentParser(description="Initialize poweron_chatbotv2 database")
parser.add_argument("--dry-run", action="store_true", help="Do not create, only report")
args = parser.parse_args()
ok = init_chatbotv2_db(dry_run=args.dry_run)
sys.exit(0 if ok else 1)
if __name__ == "__main__":
main()