# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Chatbot V2 service - orchestration for upload/extraction and chat. """ import logging import uuid from typing import Optional, List from modules.datamodels.datamodelUam import User from modules.datamodels.datamodelChat import UserInputRequest from modules.datamodels.datamodelAi import OperationTypeEnum, ProcessingModeEnum from modules.shared.timeUtils import getUtcTimestamp from modules.services import getInterface as getServices from .interfaceFeatureChatbotV2 import getInterface as getChatbotV2Interface from .datamodelFeatureChatbotV2 import ChatbotV2Conversation from .contextExtractionLangGraph import run_extraction from .chatbotV2 import create_chat_graph from .config import load_chatbotv2_config_from_instance from .bridges import AICenterChatModel, clear_workflow_allowed_providers, ChatbotV2Checkpointer from modules.features.chatbot.streaming.events import get_event_manager logger = logging.getLogger(__name__) async def _load_config(instance_id: Optional[str]): """Load ChatbotV2 config from feature instance.""" if not instance_id: return None from modules.interfaces.interfaceDbApp import getRootInterface from modules.interfaces.interfaceFeatures import getFeatureInterface root = getRootInterface() feat = getFeatureInterface(root.db) instance = feat.getFeatureInstance(instance_id) if not instance: return None return load_chatbotv2_config_from_instance(instance) async def uploadAndExtract( currentUser: User, mandateId: Optional[str], instanceId: str, listFileId: List[str] ) -> ChatbotV2Conversation: """ Create conversation, store context files, run extraction, save result. """ interface = getChatbotV2Interface(currentUser, mandateId=mandateId, featureInstanceId=instanceId) services = getServices(currentUser, mandateId=mandateId, featureInstanceId=instanceId) conversation_id = str(uuid.uuid4()) conv_data = { "id": conversation_id, "featureInstanceId": instanceId, "mandateId": mandateId, "status": "extracting", "name": "Context Chat", "currentRound": 0, "maxSteps": 10, "startedAt": getUtcTimestamp(), "lastActivity": getUtcTimestamp() } conv = interface.createConversation(conv_data) files_for_extraction = [] for idx, file_id in enumerate(listFileId): try: file_info = services.chat.getFileInfo(file_id) if not file_info: logger.warning(f"File {file_id} not found") continue file_bytes = services.chat.getFileData(file_id) if not file_bytes: logger.warning(f"No data for file {file_id}") continue interface.createContextFile({ "conversationId": conversation_id, "fileId": file_id, "fileName": file_info.get("fileName", "document"), "mimeType": file_info.get("mimeType", "application/octet-stream"), "fileSize": file_info.get("size", 0), "uploadOrder": idx }) files_for_extraction.append({ "fileId": file_id, "bytes": file_bytes, "mimeType": file_info.get("mimeType", "application/octet-stream"), "fileName": file_info.get("fileName", "document") }) except Exception as e: logger.error(f"Error loading file {file_id}: {e}", exc_info=True) if not files_for_extraction: interface.updateConversation(conversation_id, {"status": "ready"}) interface.createExtractedContext({ "conversationId": conversation_id, "textBlocks": [], "summaries": [], "extractionStatus": "failed", "errors": ["No files could be loaded"] }) return interface.getConversation(conversation_id) result = run_extraction(files_for_extraction) # Store sections in summaries for chat context (build_context_system_prompt uses sections) summaries = result.get("summaries", []) or result.get("sections", []) extracted = interface.createExtractedContext({ "conversationId": conversation_id, "textBlocks": result.get("textBlocks", []), "summaries": summaries, "extractionStatus": "completed", "errors": result.get("errors", []), "createdAt": getUtcTimestamp() }) interface.updateConversation(conversation_id, { "status": "ready", "extractedContextId": extracted.id, "lastActivity": getUtcTimestamp() }) return interface.getConversation(conversation_id) async def chatProcessV2( currentUser: User, mandateId: Optional[str], userInput: UserInputRequest, conversationId: Optional[str], instanceId: str ) -> Optional[ChatbotV2Conversation]: """ Run chat with extracted context. Creates or resumes conversation. """ interface = getChatbotV2Interface(currentUser, mandateId=mandateId, featureInstanceId=instanceId) event_manager = get_event_manager() config = await _load_config(instanceId) base_prompt = config.systemPrompt if config else "You are a helpful assistant. Answer based on the provided context." if conversationId: conv = interface.getConversation(conversationId) if not conv: raise ValueError(f"Conversation {conversationId} not found") if conv.status == "extracting": raise ValueError("Conversation not ready for chat (status: extracting). Wait for extraction to complete.") # Reset stale "running" from previous failed/interrupted request if conv.status == "running": logger.info("Resetting stale conversation status from 'running' to 'ready'") interface.updateConversation(conversationId, {"status": "ready"}) new_round = conv.currentRound + 1 interface.updateConversation(conversationId, { "status": "running", "currentRound": new_round, "lastActivity": getUtcTimestamp() }) conv = interface.getConversation(conversationId) if not event_manager.has_queue(conversationId): event_manager.create_queue(conversationId) else: raise ValueError("conversationId is required for Chatbot V2 chat") extracted_ctx = interface.getExtractedContextByConversation(conversationId) ctx_dict = {"textBlocks": [], "sections": []} if extracted_ctx: tb = extracted_ctx.textBlocks or [] ctx_dict["textBlocks"] = tb # summaries hold sections from extraction (fileName, text, blockCount) ctx_dict["sections"] = extracted_ctx.summaries if isinstance(extracted_ctx.summaries, list) else [] if not ctx_dict["sections"] and tb: # Build sections from textBlocks if summaries empty for doc in tb: blocks = doc.get("blocks", []) text_parts = [b.get("text", "") for b in blocks] ctx_dict["sections"].append({ "fileId": doc.get("fileId"), "fileName": doc.get("fileName", "document"), "text": "\n".join(text_parts), "blockCount": len(blocks) }) user_msg = userInput.prompt or "" if not user_msg.strip(): raise ValueError("Prompt is required") max_context_chars = config.maxContextChars if config else None chunk_size = config.chunkSize if config else None chunk_overlap = config.chunkOverlap if config else None # Resolve to concrete values (chat node reads from configurable) max_ctx = max_context_chars if max_context_chars and max_context_chars > 0 else 60_000 cs = chunk_size if chunk_size and chunk_size > 0 else 15_000 co = chunk_overlap if chunk_overlap is not None and chunk_overlap >= 0 else 500 allowed_providers = config.model.allowedProviders if config else [] services = getServices(currentUser, mandateId=mandateId, featureInstanceId=instanceId) if allowed_providers: services.allowedProviders = allowed_providers # type: ignore[attr-defined] model = AICenterChatModel( user=currentUser, operation_type=OperationTypeEnum.DATA_ANALYSE, processing_mode=ProcessingModeEnum.BASIC, workflow_id=conversationId, allowed_providers=allowed_providers if allowed_providers else None ) memory = ChatbotV2Checkpointer( user=currentUser, workflow_id=conversationId, mandateId=mandateId, featureInstanceId=instanceId ) app = create_chat_graph(model, memory) chatbotv2_context = { "ctx_dict": ctx_dict, "user_question": user_msg, "base_prompt": base_prompt, "max_context_chars": max_ctx, "chunk_size": cs, "chunk_overlap": co, } interface.createMessage({ "id": str(uuid.uuid4()), "conversationId": conversationId, "message": user_msg, "role": "user", "status": "first", "sequenceNr": len(interface.getMessages(conversationId)) + 1, "publishedAt": getUtcTimestamp(), "roundNumber": conv.currentRound }) try: result = await app.ainvoke( { "messages": [{"role": "user", "content": user_msg}], "chatbotv2_context": chatbotv2_context, }, config={"configurable": {"thread_id": conversationId}} ) # Assistant message is stored by ChatbotV2Checkpointer.put() - do NOT create again here. # The stream sends chatData at start (from DB), so no need to emit chatdata event. await event_manager.emit_event( context_id=conversationId, event_type="complete", data={}, event_category="workflow" ) finally: clear_workflow_allowed_providers(conversationId) interface.updateConversation(conversationId, { "status": "ready", "lastActivity": getUtcTimestamp() }) return interface.getConversation(conversationId)