enhanced stt/tts functions

2026-03-16 00:47:42 +01:00 · 2026-03-16 00:47:42 +01:00 · 3d49bd9d03
commit 3d49bd9d03
parent 7fe6f9bc97
7 changed files with 510 additions and 39 deletions
--- a/modules/connectors/providerMsft/connectorMsft.py
+++ b/modules/connectors/providerMsft/connectorMsft.py
@ -229,15 +229,15 @@ class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
            return [
                ExternalEntry(
                    name=f.get("displayName", ""),
-                    path=f"/{f.get('displayName', '')}",
+                    path=f"/{f.get('id', '')}",
                    isFolder=True,
                    metadata={"id": f.get("id"), "totalItemCount": f.get("totalItemCount")},
                )
                for f in result.get("value", [])
            ]
-        folderName = path.strip("/")
+        folderId = path.strip("/")
-        endpoint = f"me/mailFolders/{folderName}/messages?$top=25&$orderby=receivedDateTime desc"
+        endpoint = f"me/mailFolders/{folderId}/messages?$top=25&$orderby=receivedDateTime desc"
        result = await self._graphGet(endpoint)
        if "error" in result:
            return []
--- a/modules/datamodels/datamodelVoice.py
+++ b/modules/datamodels/datamodelVoice.py
@ -2,6 +2,7 @@
 # All rights reserved.
 """Voice settings datamodel."""
 from typing import Dict, Any, Optional
 from pydantic import BaseModel, Field
 from modules.shared.attributeUtils import registerModelLabels
 from modules.shared.timeUtils import getUtcTimestamp
@ -16,6 +17,7 @@ class VoiceSettings(BaseModel):
    sttLanguage: str = Field(default="de-DE", description="Speech-to-Text language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
    ttsLanguage: str = Field(default="de-DE", description="Text-to-Speech language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
    ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
    ttsVoiceMap: Dict[str, Any] = Field(default_factory=dict, description="Per-language voice mapping, e.g. {'de-DE': {'voiceName': 'de-DE-Wavenet-A'}, 'en-US': {'voiceName': 'en-US-Wavenet-C'}}", json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False})
    translationEnabled: bool = Field(default=True, description="Whether translation is enabled", json_schema_extra={"frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False})
    targetLanguage: str = Field(default="en-US", description="Target language for translation", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False})
    creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False})
@ -33,6 +35,7 @@ registerModelLabels(
        "sttLanguage": {"en": "STT Language", "fr": "Langue STT"},
        "ttsLanguage": {"en": "TTS Language", "fr": "Langue TTS"},
        "ttsVoice": {"en": "TTS Voice", "fr": "Voix TTS"},
        "ttsVoiceMap": {"en": "TTS Voice Map", "fr": "Carte des voix TTS"},
        "translationEnabled": {"en": "Translation Enabled", "fr": "Traduction activée"},
        "targetLanguage": {"en": "Target Language", "fr": "Langue cible"},
        "creationDate": {"en": "Creation Date", "fr": "Date de création"},
--- a/modules/features/workspace/routeFeatureWorkspace.py
+++ b/modules/features/workspace/routeFeatureWorkspace.py
@ -718,3 +718,120 @@ async def synthesizeVoice(
    if not text:
        raise HTTPException(status_code=400, detail="text is required")
    return JSONResponse({"audio": None, "note": "TTS via browser Speech Synthesis API recommended"})
 # =========================================================================
 # Voice Settings Endpoints
 # =========================================================================
@router.get("/{instanceId}/settings/voice")
@limiter.limit("30/minute")
 async def getVoiceSettings(
    request: Request,
    instanceId: str = Path(...),
    context: RequestContext = Depends(getRequestContext),
 ):
    """Load voice settings for the current user and instance."""
    _validateInstanceAccess(instanceId, context)
    dbMgmt = _getDbManagement(context, instanceId)
    userId = str(context.user.id)
    vs = dbMgmt.getVoiceSettings(userId)
    if not vs:
        vs = dbMgmt.getOrCreateVoiceSettings(userId)
    result = vs.model_dump() if vs else {}
    return JSONResponse(result)
@router.put("/{instanceId}/settings/voice")
@limiter.limit("30/minute")
 async def updateVoiceSettings(
    request: Request,
    instanceId: str = Path(...),
    body: dict = Body(...),
    context: RequestContext = Depends(getRequestContext),
 ):
    """Update voice settings for the current user and instance."""
    _validateInstanceAccess(instanceId, context)
    dbMgmt = _getDbManagement(context, instanceId)
    userId = str(context.user.id)
    vs = dbMgmt.getVoiceSettings(userId)
    if not vs:
        createData = {
            "userId": userId,
            "mandateId": str(context.mandateId) if context.mandateId else "",
            "featureInstanceId": instanceId,
        }
        createData.update(body)
        created = dbMgmt.createVoiceSettings(createData)
        return JSONResponse(created)
    updateData = {k: v for k, v in body.items() if k not in ("id", "userId", "mandateId", "featureInstanceId", "creationDate")}
    updated = dbMgmt.updateVoiceSettings(userId, updateData)
    return JSONResponse(updated)
@router.get("/{instanceId}/voice/languages")
@limiter.limit("30/minute")
 async def getVoiceLanguages(
    request: Request,
    instanceId: str = Path(...),
    context: RequestContext = Depends(getRequestContext),
 ):
    """Return available TTS languages."""
    mandateId = _validateInstanceAccess(instanceId, context)
    from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
    voiceInterface = getVoiceInterface(context.user, mandateId)
    languagesResult = await voiceInterface.getAvailableLanguages()
    languageList = languagesResult.get("languages", []) if isinstance(languagesResult, dict) else languagesResult
    return JSONResponse({"languages": languageList})
@router.get("/{instanceId}/voice/voices")
@limiter.limit("30/minute")
 async def getVoiceVoices(
    request: Request,
    instanceId: str = Path(...),
    language: str = Query("de-DE"),
    context: RequestContext = Depends(getRequestContext),
 ):
    """Return available TTS voices for a given language."""
    mandateId = _validateInstanceAccess(instanceId, context)
    from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
    voiceInterface = getVoiceInterface(context.user, mandateId)
    voicesResult = await voiceInterface.getAvailableVoices(language)
    voiceList = voicesResult.get("voices", []) if isinstance(voicesResult, dict) else voicesResult
    return JSONResponse({"voices": voiceList})
@router.post("/{instanceId}/voice/test")
@limiter.limit("10/minute")
 async def testVoice(
    request: Request,
    instanceId: str = Path(...),
    body: dict = Body(...),
    context: RequestContext = Depends(getRequestContext),
 ):
    """Test a specific voice with a sample text."""
    import base64
    mandateId = _validateInstanceAccess(instanceId, context)
    text = body.get("text", "Hallo, das ist ein Stimmtest.")
    language = body.get("language", "de-DE")
    voiceId = body.get("voiceId")
    from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
    voiceInterface = getVoiceInterface(context.user, mandateId)
    try:
        result = await voiceInterface.textToSpeech(text=text, languageCode=language, voiceName=voiceId)
        if result and isinstance(result, dict):
            audioContent = result.get("audioContent")
            if audioContent:
                audioB64 = base64.b64encode(
                    audioContent if isinstance(audioContent, bytes) else audioContent.encode()
                ).decode()
                return JSONResponse({"success": True, "audio": audioB64, "format": "mp3", "text": text})
        return JSONResponse({"success": False, "error": "TTS returned no audio"})
    except Exception as e:
        logger.error(f"Voice test failed: {e}")
        raise HTTPException(status_code=500, detail=f"TTS test failed: {str(e)}")
--- a/modules/routes/routeSecurityGoogle.py
+++ b/modules/routes/routeSecurityGoogle.py
@ -87,9 +87,10 @@ CLIENT_SECRET = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET")
 REDIRECT_URI = APP_CONFIG.get("Service_GOOGLE_REDIRECT_URI")
 SCOPES = [
    "https://www.googleapis.com/auth/gmail.readonly",
    "https://www.googleapis.com/auth/drive.readonly",
    "https://www.googleapis.com/auth/userinfo.profile",
    "https://www.googleapis.com/auth/userinfo.email",
-    "openid"
+    "openid",
 ]
@router.get("/config")
--- a/modules/routes/routeSecurityMsft.py
+++ b/modules/routes/routeSecurityMsft.py
@ -59,6 +59,7 @@ SCOPES = [
    "Mail.Send",                # Send mail
    "Files.ReadWrite.All",      # Read and write files (SharePoint/OneDrive)
    "Sites.ReadWrite.All",      # Read and write SharePoint sites
    "Team.ReadBasic.All",       # List joined teams and channels
    # Teams Bot: Meeting and chat access (requires admin consent)
    "OnlineMeetings.Read",      # Read user's Teams meeting details (delegated scope)
    "Chat.ReadWrite",           # Read and write Teams chat messages
--- a/modules/serviceCenter/services/serviceAgent/agentLoop.py
+++ b/modules/serviceCenter/services/serviceAgent/agentLoop.py
@ -208,7 +208,8 @@ async def runAgentLoop(
        results = await _executeToolCalls(toolCalls, toolRegistry, {
            "workflowId": workflowId,
            "userId": userId,
-            "featureInstanceId": featureInstanceId
+            "featureInstanceId": featureInstanceId,
            "mandateId": mandateId,
        })
        state.totalToolCalls += len(results)
--- a/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
+++ b/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
@ -209,7 +209,8 @@ class AgentService:
                    "## Attached Files\n"
                    "These files have been uploaded and processed through the extraction pipeline.\n"
                    "Use `readFile(fileId)` to read text content, `readContentObjects(fileId)` for structured access, "
-                    "or `describeImage(fileId)` for image analysis.\n\n"
+                    "or `describeImage(fileId)` for image analysis.\n"
                    "When generating documents with `renderDocument`, embed images using `![alt text](file:fileId)` in the markdown content.\n\n"
                )
                header += "\n\n".join(fileDescriptions)
                return f"{header}\n\n---\n\nUser request: {prompt}"
@ -1226,68 +1227,415 @@ def _registerCoreTools(registry: ToolRegistry, services):
        readOnly=True,
    )
-    # ---- Document generation tool ----
+    # ---- Document rendering tool ----
-    async def _generateDocument(args: Dict[str, Any], context: Dict[str, Any]):
+    def _markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
-        """Generate a document in any format using the existing GenerationService + RendererRegistry."""
+        """Convert markdown content to the standard document JSON format expected by renderers."""
-        prompt = args.get("prompt", "")
+        import re as _re
        sections = []
        order = 0
        lines = markdown.split("\n")
        i = 0
        def _nextId():
            nonlocal order
            order += 1
            return f"s_{order}"
        while i < len(lines):
            line = lines[i]
            # --- Headings ---
            headingMatch = _re.match(r'^(#{1,6})\s+(.+)', line)
            if headingMatch:
                level = len(headingMatch.group(1))
                text = headingMatch.group(2).strip()
                sections.append({
                    "id": _nextId(), "content_type": "heading", "order": order,
                    "elements": [{"content": {"text": text, "level": level}}],
                })
                i += 1
                continue
            # --- Fenced code blocks ---
            codeMatch = _re.match(r'^```(\w*)', line)
            if codeMatch:
                lang = codeMatch.group(1) or "text"
                codeLines = []
                i += 1
                while i < len(lines) and not lines[i].startswith("```"):
                    codeLines.append(lines[i])
                    i += 1
                i += 1
                sections.append({
                    "id": _nextId(), "content_type": "code_block", "order": order,
                    "elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}],
                })
                continue
            # --- Tables ---
            tableMatch = _re.match(r'^\|(.+)\|$', line)
            if tableMatch and (i + 1) < len(lines) and _re.match(r'^\|[\s\-:|]+\|$', lines[i + 1]):
                headerCells = [c.strip() for c in tableMatch.group(1).split("|")]
                i += 2
                rows = []
                while i < len(lines) and _re.match(r'^\|(.+)\|$', lines[i]):
                    rowCells = [c.strip() for c in lines[i][1:-1].split("|")]
                    rows.append(rowCells)
                    i += 1
                sections.append({
                    "id": _nextId(), "content_type": "table", "order": order,
                    "elements": [{"content": {"headers": headerCells, "rows": rows}}],
                })
                continue
            # --- Bullet / numbered lists ---
            listMatch = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', line)
            if listMatch:
                isNumbered = bool(_re.match(r'\d+[.)]', listMatch.group(2)))
                items = []
                while i < len(lines) and _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]):
                    m = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i])
                    items.append({"text": m.group(3).strip()})
                    i += 1
                sections.append({
                    "id": _nextId(), "content_type": "bullet_list", "order": order,
                    "elements": [{"content": {"items": items, "list_type": "numbered" if isNumbered else "bullet"}}],
                })
                continue
            # --- Empty lines (skip) ---
            if not line.strip():
                i += 1
                continue
            # --- Images: ![alt](file:fileId) or ![alt](url) ---
            imgMatch = _re.match(r'^!\[([^\]]*)\]\(([^)]+)\)', line)
            if imgMatch:
                altText = imgMatch.group(1).strip() or "Image"
                src = imgMatch.group(2).strip()
                fileId = ""
                if src.startswith("file:"):
                    fileId = src[5:]
                sections.append({
                    "id": _nextId(), "content_type": "image", "order": order,
                    "elements": [{
                        "content": {
                            "altText": altText,
                            "base64Data": "",
                            "_fileRef": fileId,
                            "_srcUrl": src if not fileId else "",
                        }
                    }],
                })
                i += 1
                continue
            # --- Paragraph (collect consecutive non-empty lines) ---
            paraLines = []
            while i < len(lines) and lines[i].strip() and not _re.match(r'^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )', lines[i]):
                paraLines.append(lines[i])
                i += 1
            if paraLines:
                sections.append({
                    "id": _nextId(), "content_type": "paragraph", "order": order,
                    "elements": [{"content": {"text": " ".join(paraLines)}}],
                })
                continue
            i += 1
        if not sections:
            sections.append({
                "id": _nextId(), "content_type": "paragraph", "order": order,
                "elements": [{"content": {"text": markdown.strip() or "(empty)"}}],
            })
        return {
            "metadata": {
                "split_strategy": "single_document",
                "source_documents": [],
                "extraction_method": "agent_rendering",
                "title": title,
                "language": language,
            },
            "documents": [{
                "id": "doc_1",
                "title": title,
                "sections": sections,
            }],
        }
    async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]):
        """Render agent-produced markdown content into any document format via the RendererRegistry."""
        import re as _re
        content = args.get("content", "")
        outputFormat = args.get("outputFormat", "pdf")
-        title = args.get("title", "Generated Document")
+        title = args.get("title", "Document")
        language = args.get("language", "de")
-        if not prompt:
+        if not content:
-            return ToolResult(toolCallId="", toolName="generateDocument", success=False, error="prompt is required")
+            return ToolResult(toolCallId="", toolName="renderDocument", success=False, error="content is required")
        try:
            structuredContent = _markdownToDocumentJson(content, title, language)
            # Resolve image file references (file:fileId) to base64 data from Knowledge Store
            knowledgeService = None
            try:
                knowledgeService = services.getService("knowledge")
            except Exception:
                pass
            resolvedImages = 0
            for doc in structuredContent.get("documents", []):
                for section in doc.get("sections", []):
                    if section.get("content_type") != "image":
                        continue
                    for element in section.get("elements", []):
                        contentObj = element.get("content", {})
                        fileRef = contentObj.get("_fileRef", "")
                        if not fileRef or contentObj.get("base64Data"):
                            continue
                        if knowledgeService:
                            chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef)
                            imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
                            if imageChunks:
                                contentObj["base64Data"] = imageChunks[0].get("data", "")
                                chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png")
                                contentObj["mimeType"] = chunkMime
                                resolvedImages += 1
                        if not contentObj.get("base64Data"):
                            try:
                                rawBytes = services.chat.getFileData(fileRef)
                                if rawBytes:
                                    import base64 as _b64
                                    contentObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii")
                                    contentObj["mimeType"] = "image/png"
                                    resolvedImages += 1
                            except Exception:
                                pass
                        contentObj.pop("_fileRef", None)
                        contentObj.pop("_srcUrl", None)
            sectionCount = len(structuredContent.get("documents", [{}])[0].get("sections", []))
            logger.info(f"renderDocument: parsed {sectionCount} sections from markdown ({len(content)} chars), resolved {resolvedImages} image(s), format={outputFormat}")
            generationService = services.getService("generation")
            aiService = services.ai
            structuredContent = await generationService.generateDocumentWithTwoPhases(userPrompt=prompt)
            documents = await generationService.renderReport(
                extractedContent=structuredContent,
                outputFormat=outputFormat,
-                language="de",
+                language=language,
                title=title,
-                userPrompt=prompt,
+                userPrompt=content,
                aiService=aiService,
            )
            if not documents:
-                return ToolResult(toolCallId="", toolName="generateDocument", success=False, error="Rendering produced no documents")
+                return ToolResult(toolCallId="", toolName="renderDocument", success=False, error="Rendering produced no output")
            savedFiles = []
            sideEvents = []
            chatService = services.chat
            for doc in documents:
                docData = doc.data if hasattr(doc, "data") else doc.get("data", b"")
                docName = doc.fileName if hasattr(doc, "fileName") else doc.get("fileName", f"{title}.{outputFormat}")
                docMime = doc.mimeType if hasattr(doc, "mimeType") else doc.get("mimeType", "application/octet-stream")
-                fileItem = chatService.interfaceDbComponent.saveGeneratedFile(
+            sanitizedTitle = _re.sub(r'[^a-zA-Z0-9._-]', '_', title).strip('_') or "document"
-                    docData, docName, docMime,
+
-                ) if hasattr(chatService.interfaceDbComponent, "saveGeneratedFile") else None
+            for doc in documents:
                docData = doc.documentData if hasattr(doc, "documentData") else b""
                docName = doc.filename if hasattr(doc, "filename") else f"{sanitizedTitle}.{outputFormat}"
                docMime = doc.mimeType if hasattr(doc, "mimeType") else "application/octet-stream"
                if not docName.lower().endswith(f".{outputFormat}"):
                    docName = f"{sanitizedTitle}.{outputFormat}"
                fileItem = None
                if hasattr(chatService.interfaceDbComponent, "saveGeneratedFile"):
                    fileItem = chatService.interfaceDbComponent.saveGeneratedFile(docData, docName, docMime)
                else:
                    fileItem, _ = chatService.interfaceDbComponent.saveUploadedFile(docData, docName)
                if fileItem:
-                    savedFiles.append(f"- {docName} (id: {fileItem.id if hasattr(fileItem, 'id') else fileItem.get('id', '?')})")
+                    fid = fileItem.id if hasattr(fileItem, "id") else fileItem.get("id", "?")
-                else:
+                    savedFiles.append(f"- {docName} (id: {fid})")
-                    savedFiles.append(f"- {docName} (generated, not saved)")
+                    sideEvents.append({
                        "type": "fileCreated",
                        "data": {
                            "fileId": fid,
                            "fileName": docName,
                            "mimeType": docMime,
                            "fileSize": len(docData),
                        },
                    })
-            result = f"Generated {len(documents)} document(s):\n" + "\n".join(savedFiles)
+            result = f"Rendered {len(documents)} document(s):\n" + "\n".join(savedFiles)
-            return ToolResult(toolCallId="", toolName="generateDocument", success=True, data=result)
+            return ToolResult(toolCallId="", toolName="renderDocument", success=True, data=result, sideEvents=sideEvents)
        except Exception as e:
-            return ToolResult(toolCallId="", toolName="generateDocument", success=False, error=str(e))
+            logger.error(f"renderDocument failed: {e}")
            return ToolResult(toolCallId="", toolName="renderDocument", success=False, error=str(e))
    registry.register(
-        "generateDocument", _generateDocument,
+        "renderDocument", _renderDocument,
-        description="Generate a document in any format (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT).",
+        description=(
            "Render markdown content into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
            "You write the full document content as markdown, then this tool converts and renders it. "
            "To embed images from uploaded files, use markdown image syntax with the file ID: ![alt text](file:fileId). "
            "The images will be resolved from the Knowledge Store and embedded in the output document."
        ),
        parameters={
            "type": "object",
            "properties": {
-                "prompt": {"type": "string", "description": "What the document should contain and how it should look"},
+                "content": {"type": "string", "description": "Full document content as markdown (headings, tables, lists, code blocks, paragraphs, images via ![alt](file:fileId))"},
                "outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
-                "title": {"type": "string", "description": "Document title", "default": "Generated Document"},
+                "title": {"type": "string", "description": "Document title", "default": "Document"},
                "language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
            },
-            "required": ["prompt"],
+            "required": ["content"],
        },
        readOnly=False,
    )
    # ── textToSpeech tool ──────────────────────────────────────────────
    def _stripMarkdownForTts(text: str) -> str:
        """Strip markdown formatting so TTS reads clean speech text."""
        import re as _re
        t = text
        t = _re.sub(r'\*\*(.+?)\*\*', r'\1', t)
        t = _re.sub(r'\*(.+?)\*', r'\1', t)
        t = _re.sub(r'__(.+?)__', r'\1', t)
        t = _re.sub(r'_(.+?)_', r'\1', t)
        t = _re.sub(r'`[^`]+`', lambda m: m.group(0)[1:-1], t)
        t = _re.sub(r'^#{1,6}\s*', '', t, flags=_re.MULTILINE)
        t = _re.sub(r'^\s*[-*+]\s+', '', t, flags=_re.MULTILINE)
        t = _re.sub(r'^\s*\d+\.\s+', '', t, flags=_re.MULTILINE)
        t = _re.sub(r'\[(.+?)\]\(.+?\)', r'\1', t)
        t = _re.sub(r'!\[.*?\]\(.*?\)', '', t)
        t = _re.sub(r'\n{3,}', '\n\n', t)
        return t.strip()
    async def _textToSpeech(args: Dict[str, Any], context: Dict[str, Any]):
        """Convert text to speech using Google Cloud TTS, deliver audio via SSE."""
        import base64 as _b64
        text = args.get("text", "")
        language = args.get("language", "auto")
        voiceName = args.get("voiceName")
        if not text:
            return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="text is required")
        cleanText = _stripMarkdownForTts(text)
        if not cleanText:
            return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="text is empty after stripping markdown")
        try:
            from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
            mandateId = context.get("mandateId", "")
            voiceInterface = getVoiceInterface(currentUser=None, mandateId=mandateId)
            _ISO_TO_BCP47 = {
                "de": "de-DE", "en": "en-US", "fr": "fr-FR", "it": "it-IT",
                "es": "es-ES", "pt": "pt-BR", "nl": "nl-NL", "pl": "pl-PL",
                "ru": "ru-RU", "ja": "ja-JP", "zh": "zh-CN", "ko": "ko-KR",
                "ar": "ar-XA", "hi": "hi-IN", "tr": "tr-TR", "sv": "sv-SE",
            }
            if language == "auto":
                try:
                    snippet = cleanText[:500]
                    detectResult = await voiceInterface.detectLanguage(snippet)
                    if detectResult and detectResult.get("success"):
                        detected = detectResult.get("language", "de")
                        language = _ISO_TO_BCP47.get(detected, detected)
                        if "-" not in language:
                            language = _ISO_TO_BCP47.get(language, f"{language}-{language.upper()}")
                        logger.info(f"textToSpeech: auto-detected language '{detected}' -> '{language}'")
                    else:
                        language = "de-DE"
                except Exception as detectErr:
                    logger.warning(f"textToSpeech: language detection failed: {detectErr}, defaulting to de-DE")
                    language = "de-DE"
            if not voiceName:
                try:
                    featureInstanceId = context.get("featureInstanceId", "")
                    userId = context.get("userId", "")
                    if featureInstanceId and userId:
                        dbMgmt = services.chat.interfaceDbApp if hasattr(services.chat, "interfaceDbApp") else None
                        if dbMgmt and hasattr(dbMgmt, "getVoiceSettings"):
                            vs = dbMgmt.getVoiceSettings(userId)
                            if vs:
                                voiceMap = {}
                                if hasattr(vs, "ttsVoiceMap") and vs.ttsVoiceMap:
                                    voiceMap = vs.ttsVoiceMap if isinstance(vs.ttsVoiceMap, dict) else {}
                                if language in voiceMap:
                                    voiceName = voiceMap[language].get("voiceName") if isinstance(voiceMap[language], dict) else voiceMap[language]
                                    logger.info(f"textToSpeech: using configured voice '{voiceName}' for {language}")
                                elif hasattr(vs, "ttsVoice") and vs.ttsVoice and hasattr(vs, "ttsLanguage") and vs.ttsLanguage == language:
                                    voiceName = vs.ttsVoice
                except Exception as prefErr:
                    logger.debug(f"textToSpeech: could not load voice preferences: {prefErr}")
            ttsResult = await voiceInterface.textToSpeech(
                text=cleanText,
                languageCode=language,
                voiceName=voiceName,
            )
            if not ttsResult or not ttsResult.get("success"):
                errMsg = ttsResult.get("error", "TTS call failed") if ttsResult else "TTS returned None"
                return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error=errMsg)
            audioContent = ttsResult.get("audioContent", "")
            if not audioContent:
                return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="TTS returned no audio")
            if isinstance(audioContent, bytes):
                audioB64 = _b64.b64encode(audioContent).decode("ascii")
            elif isinstance(audioContent, str):
                audioB64 = audioContent
            else:
                audioB64 = str(audioContent)
            audioFormat = ttsResult.get("audioFormat", "mp3")
            charCount = len(cleanText)
            usedVoice = voiceName or "default"
            logger.info(f"textToSpeech: generated {audioFormat} audio for {charCount} chars, language={language}, voice={usedVoice}")
            return ToolResult(
                toolCallId="", toolName="textToSpeech", success=True,
                data=f"Audio generated ({charCount} characters, language={language}, voice={usedVoice}). Playing in chat.",
                sideEvents=[{
                    "type": "voiceResponse",
                    "data": {
                        "audio": audioB64,
                        "format": audioFormat,
                        "language": language,
                        "charCount": charCount,
                    },
                }],
            )
        except ImportError:
            return ToolResult(toolCallId="", toolName="textToSpeech", success=False,
                              error="Voice interface not available (missing dependency)")
        except Exception as e:
            logger.error(f"textToSpeech failed: {e}")
            return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error=str(e))
    registry.register(
        "textToSpeech", _textToSpeech,
        description=(
            "Convert text to speech audio. The audio is played directly in the chat. "
            "Use this when the user asks you to read something aloud, narrate, or speak. "
            "Language is auto-detected from the text content. You do NOT need to specify a language."
        ),
        parameters={
            "type": "object",
            "properties": {
                "text": {"type": "string", "description": "The text to convert to speech. Can include markdown (will be stripped automatically)."},
                "language": {"type": "string", "description": "BCP-47 language code (e.g. de-DE, en-US) or 'auto' for automatic detection", "default": "auto"},
                "voiceName": {"type": "string", "description": "Optional specific voice name. If omitted, uses the configured voice for the detected language."},
            },
            "required": ["text"],
        },
        readOnly=False,
    )