enhanced stt/tts functions

2026-03-16 00:47:42 +01:00 · 2026-03-16 00:47:42 +01:00 · 3d49bd9d03
commit 3d49bd9d03
parent 7fe6f9bc97
7 changed files with 510 additions and 39 deletions
--- a/modules/connectors/providerMsft/connectorMsft.py
+++ b/modules/connectors/providerMsft/connectorMsft.py
@ -229,15 +229,15 @@ class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
            return [
                ExternalEntry(
                    name=f.get("displayName", ""),
-                    path=f"/{f.get('displayName', '')}",
+                    path=f"/{f.get('id', '')}",
                    isFolder=True,
                    metadata={"id": f.get("id"), "totalItemCount": f.get("totalItemCount")},
                )
                for f in result.get("value", [])
            ]

-        folderName = path.strip("/")
-        endpoint = f"me/mailFolders/{folderName}/messages?$top=25&$orderby=receivedDateTime desc"
+        folderId = path.strip("/")
+        endpoint = f"me/mailFolders/{folderId}/messages?$top=25&$orderby=receivedDateTime desc"
        result = await self._graphGet(endpoint)
        if "error" in result:
            return []
--- a/modules/datamodels/datamodelVoice.py
+++ b/modules/datamodels/datamodelVoice.py
@ -2,6 +2,7 @@
 # All rights reserved.
 """Voice settings datamodel."""

+from typing import Dict, Any, Optional
 from pydantic import BaseModel, Field
 from modules.shared.attributeUtils import registerModelLabels
 from modules.shared.timeUtils import getUtcTimestamp
@ -16,6 +17,7 @@ class VoiceSettings(BaseModel):
    sttLanguage: str = Field(default="de-DE", description="Speech-to-Text language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
    ttsLanguage: str = Field(default="de-DE", description="Text-to-Speech language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
    ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
+    ttsVoiceMap: Dict[str, Any] = Field(default_factory=dict, description="Per-language voice mapping, e.g. {'de-DE': {'voiceName': 'de-DE-Wavenet-A'}, 'en-US': {'voiceName': 'en-US-Wavenet-C'}}", json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False})
    translationEnabled: bool = Field(default=True, description="Whether translation is enabled", json_schema_extra={"frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False})
    targetLanguage: str = Field(default="en-US", description="Target language for translation", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False})
    creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False})
@ -33,6 +35,7 @@ registerModelLabels(
        "sttLanguage": {"en": "STT Language", "fr": "Langue STT"},
        "ttsLanguage": {"en": "TTS Language", "fr": "Langue TTS"},
        "ttsVoice": {"en": "TTS Voice", "fr": "Voix TTS"},
+        "ttsVoiceMap": {"en": "TTS Voice Map", "fr": "Carte des voix TTS"},
        "translationEnabled": {"en": "Translation Enabled", "fr": "Traduction activée"},
        "targetLanguage": {"en": "Target Language", "fr": "Langue cible"},
        "creationDate": {"en": "Creation Date", "fr": "Date de création"},
--- a/modules/features/workspace/routeFeatureWorkspace.py
+++ b/modules/features/workspace/routeFeatureWorkspace.py
@ -718,3 +718,120 @@ async def synthesizeVoice(
    if not text:
        raise HTTPException(status_code=400, detail="text is required")
    return JSONResponse({"audio": None, "note": "TTS via browser Speech Synthesis API recommended"})
+
+
+# =========================================================================
+# Voice Settings Endpoints
+# =========================================================================
+
+@router.get("/{instanceId}/settings/voice")
+@limiter.limit("30/minute")
+async def getVoiceSettings(
+    request: Request,
+    instanceId: str = Path(...),
+    context: RequestContext = Depends(getRequestContext),
+):
+    """Load voice settings for the current user and instance."""
+    _validateInstanceAccess(instanceId, context)
+    dbMgmt = _getDbManagement(context, instanceId)
+    userId = str(context.user.id)
+    vs = dbMgmt.getVoiceSettings(userId)
+    if not vs:
+        vs = dbMgmt.getOrCreateVoiceSettings(userId)
+    result = vs.model_dump() if vs else {}
+    return JSONResponse(result)
+
+
+@router.put("/{instanceId}/settings/voice")
+@limiter.limit("30/minute")
+async def updateVoiceSettings(
+    request: Request,
+    instanceId: str = Path(...),
+    body: dict = Body(...),
+    context: RequestContext = Depends(getRequestContext),
+):
+    """Update voice settings for the current user and instance."""
+    _validateInstanceAccess(instanceId, context)
+    dbMgmt = _getDbManagement(context, instanceId)
+    userId = str(context.user.id)
+
+    vs = dbMgmt.getVoiceSettings(userId)
+    if not vs:
+        createData = {
+            "userId": userId,
+            "mandateId": str(context.mandateId) if context.mandateId else "",
+            "featureInstanceId": instanceId,
+        }
+        createData.update(body)
+        created = dbMgmt.createVoiceSettings(createData)
+        return JSONResponse(created)
+
+    updateData = {k: v for k, v in body.items() if k not in ("id", "userId", "mandateId", "featureInstanceId", "creationDate")}
+    updated = dbMgmt.updateVoiceSettings(userId, updateData)
+    return JSONResponse(updated)
+
+
+@router.get("/{instanceId}/voice/languages")
+@limiter.limit("30/minute")
+async def getVoiceLanguages(
+    request: Request,
+    instanceId: str = Path(...),
+    context: RequestContext = Depends(getRequestContext),
+):
+    """Return available TTS languages."""
+    mandateId = _validateInstanceAccess(instanceId, context)
+    from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
+    voiceInterface = getVoiceInterface(context.user, mandateId)
+    languagesResult = await voiceInterface.getAvailableLanguages()
+    languageList = languagesResult.get("languages", []) if isinstance(languagesResult, dict) else languagesResult
+    return JSONResponse({"languages": languageList})
+
+
+@router.get("/{instanceId}/voice/voices")
+@limiter.limit("30/minute")
+async def getVoiceVoices(
+    request: Request,
+    instanceId: str = Path(...),
+    language: str = Query("de-DE"),
+    context: RequestContext = Depends(getRequestContext),
+):
+    """Return available TTS voices for a given language."""
+    mandateId = _validateInstanceAccess(instanceId, context)
+    from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
+    voiceInterface = getVoiceInterface(context.user, mandateId)
+    voicesResult = await voiceInterface.getAvailableVoices(language)
+    voiceList = voicesResult.get("voices", []) if isinstance(voicesResult, dict) else voicesResult
+    return JSONResponse({"voices": voiceList})
+
+
+@router.post("/{instanceId}/voice/test")
+@limiter.limit("10/minute")
+async def testVoice(
+    request: Request,
+    instanceId: str = Path(...),
+    body: dict = Body(...),
+    context: RequestContext = Depends(getRequestContext),
+):
+    """Test a specific voice with a sample text."""
+    import base64
+    mandateId = _validateInstanceAccess(instanceId, context)
+    text = body.get("text", "Hallo, das ist ein Stimmtest.")
+    language = body.get("language", "de-DE")
+    voiceId = body.get("voiceId")
+
+    from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
+    voiceInterface = getVoiceInterface(context.user, mandateId)
+
+    try:
+        result = await voiceInterface.textToSpeech(text=text, languageCode=language, voiceName=voiceId)
+        if result and isinstance(result, dict):
+            audioContent = result.get("audioContent")
+            if audioContent:
+                audioB64 = base64.b64encode(
+                    audioContent if isinstance(audioContent, bytes) else audioContent.encode()
+                ).decode()
+                return JSONResponse({"success": True, "audio": audioB64, "format": "mp3", "text": text})
+        return JSONResponse({"success": False, "error": "TTS returned no audio"})
+    except Exception as e:
+        logger.error(f"Voice test failed: {e}")
+        raise HTTPException(status_code=500, detail=f"TTS test failed: {str(e)}")
--- a/modules/routes/routeSecurityGoogle.py
+++ b/modules/routes/routeSecurityGoogle.py
@ -87,9 +87,10 @@ CLIENT_SECRET = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET")
 REDIRECT_URI = APP_CONFIG.get("Service_GOOGLE_REDIRECT_URI")
 SCOPES = [
    "https://www.googleapis.com/auth/gmail.readonly",
+    "https://www.googleapis.com/auth/drive.readonly",
    "https://www.googleapis.com/auth/userinfo.profile",
    "https://www.googleapis.com/auth/userinfo.email",
-    "openid"
+    "openid",
 ]

@router.get("/config")
--- a/modules/routes/routeSecurityMsft.py
+++ b/modules/routes/routeSecurityMsft.py
@ -59,6 +59,7 @@ SCOPES = [
    "Mail.Send",                # Send mail
    "Files.ReadWrite.All",      # Read and write files (SharePoint/OneDrive)
    "Sites.ReadWrite.All",      # Read and write SharePoint sites
+    "Team.ReadBasic.All",       # List joined teams and channels
    # Teams Bot: Meeting and chat access (requires admin consent)
    "OnlineMeetings.Read",      # Read user's Teams meeting details (delegated scope)
    "Chat.ReadWrite",           # Read and write Teams chat messages
--- a/modules/serviceCenter/services/serviceAgent/agentLoop.py
+++ b/modules/serviceCenter/services/serviceAgent/agentLoop.py
@ -208,7 +208,8 @@ async def runAgentLoop(
        results = await _executeToolCalls(toolCalls, toolRegistry, {
            "workflowId": workflowId,
            "userId": userId,
-            "featureInstanceId": featureInstanceId
+            "featureInstanceId": featureInstanceId,
+            "mandateId": mandateId,
        })
        state.totalToolCalls += len(results)

--- a/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
+++ b/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
@ -209,7 +209,8 @@ class AgentService:
                    "## Attached Files\n"
                    "These files have been uploaded and processed through the extraction pipeline.\n"
                    "Use `readFile(fileId)` to read text content, `readContentObjects(fileId)` for structured access, "
-                    "or `describeImage(fileId)` for image analysis.\n\n"
+                    "or `describeImage(fileId)` for image analysis.\n"
+                    "When generating documents with `renderDocument`, embed images using `![alt text](file:fileId)` in the markdown content.\n\n"
                )
                header += "\n\n".join(fileDescriptions)
                return f"{header}\n\n---\n\nUser request: {prompt}"
@ -1226,68 +1227,415 @@ def _registerCoreTools(registry: ToolRegistry, services):
        readOnly=True,
    )

-    # ---- Document generation tool ----
+    # ---- Document rendering tool ----

-    async def _generateDocument(args: Dict[str, Any], context: Dict[str, Any]):
-        """Generate a document in any format using the existing GenerationService + RendererRegistry."""
-        prompt = args.get("prompt", "")
+    def _markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
+        """Convert markdown content to the standard document JSON format expected by renderers."""
+        import re as _re
+
+        sections = []
+        order = 0
+        lines = markdown.split("\n")
+        i = 0
+
+        def _nextId():
+            nonlocal order
+            order += 1
+            return f"s_{order}"
+
+        while i < len(lines):
+            line = lines[i]
+
+            # --- Headings ---
+            headingMatch = _re.match(r'^(#{1,6})\s+(.+)', line)
+            if headingMatch:
+                level = len(headingMatch.group(1))
+                text = headingMatch.group(2).strip()
+                sections.append({
+                    "id": _nextId(), "content_type": "heading", "order": order,
+                    "elements": [{"content": {"text": text, "level": level}}],
+                })
+                i += 1
+                continue
+
+            # --- Fenced code blocks ---
+            codeMatch = _re.match(r'^```(\w*)', line)
+            if codeMatch:
+                lang = codeMatch.group(1) or "text"
+                codeLines = []
+                i += 1
+                while i < len(lines) and not lines[i].startswith("```"):
+                    codeLines.append(lines[i])
+                    i += 1
+                i += 1
+                sections.append({
+                    "id": _nextId(), "content_type": "code_block", "order": order,
+                    "elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}],
+                })
+                continue
+
+            # --- Tables ---
+            tableMatch = _re.match(r'^\|(.+)\|$', line)
+            if tableMatch and (i + 1) < len(lines) and _re.match(r'^\|[\s\-:|]+\|$', lines[i + 1]):
+                headerCells = [c.strip() for c in tableMatch.group(1).split("|")]
+                i += 2
+                rows = []
+                while i < len(lines) and _re.match(r'^\|(.+)\|$', lines[i]):
+                    rowCells = [c.strip() for c in lines[i][1:-1].split("|")]
+                    rows.append(rowCells)
+                    i += 1
+                sections.append({
+                    "id": _nextId(), "content_type": "table", "order": order,
+                    "elements": [{"content": {"headers": headerCells, "rows": rows}}],
+                })
+                continue
+
+            # --- Bullet / numbered lists ---
+            listMatch = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', line)
+            if listMatch:
+                isNumbered = bool(_re.match(r'\d+[.)]', listMatch.group(2)))
+                items = []
+                while i < len(lines) and _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]):
+                    m = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i])
+                    items.append({"text": m.group(3).strip()})
+                    i += 1
+                sections.append({
+                    "id": _nextId(), "content_type": "bullet_list", "order": order,
+                    "elements": [{"content": {"items": items, "list_type": "numbered" if isNumbered else "bullet"}}],
+                })
+                continue
+
+            # --- Empty lines (skip) ---
+            if not line.strip():
+                i += 1
+                continue
+
+            # --- Images: ![alt](file:fileId) or ![alt](url) ---
+            imgMatch = _re.match(r'^!\[([^\]]*)\]\(([^)]+)\)', line)
+            if imgMatch:
+                altText = imgMatch.group(1).strip() or "Image"
+                src = imgMatch.group(2).strip()
+                fileId = ""
+                if src.startswith("file:"):
+                    fileId = src[5:]
+                sections.append({
+                    "id": _nextId(), "content_type": "image", "order": order,
+                    "elements": [{
+                        "content": {
+                            "altText": altText,
+                            "base64Data": "",
+                            "_fileRef": fileId,
+                            "_srcUrl": src if not fileId else "",
+                        }
+                    }],
+                })
+                i += 1
+                continue
+
+            # --- Paragraph (collect consecutive non-empty lines) ---
+            paraLines = []
+            while i < len(lines) and lines[i].strip() and not _re.match(r'^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )', lines[i]):
+                paraLines.append(lines[i])
+                i += 1
+            if paraLines:
+                sections.append({
+                    "id": _nextId(), "content_type": "paragraph", "order": order,
+                    "elements": [{"content": {"text": " ".join(paraLines)}}],
+                })
+                continue
+
+            i += 1
+
+        if not sections:
+            sections.append({
+                "id": _nextId(), "content_type": "paragraph", "order": order,
+                "elements": [{"content": {"text": markdown.strip() or "(empty)"}}],
+            })
+
+        return {
+            "metadata": {
+                "split_strategy": "single_document",
+                "source_documents": [],
+                "extraction_method": "agent_rendering",
+                "title": title,
+                "language": language,
+            },
+            "documents": [{
+                "id": "doc_1",
+                "title": title,
+                "sections": sections,
+            }],
+        }
+
+    async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]):
+        """Render agent-produced markdown content into any document format via the RendererRegistry."""
+        import re as _re
+        content = args.get("content", "")
        outputFormat = args.get("outputFormat", "pdf")
-        title = args.get("title", "Generated Document")
+        title = args.get("title", "Document")
+        language = args.get("language", "de")

-        if not prompt:
-            return ToolResult(toolCallId="", toolName="generateDocument", success=False, error="prompt is required")
+        if not content:
+            return ToolResult(toolCallId="", toolName="renderDocument", success=False, error="content is required")

        try:
+            structuredContent = _markdownToDocumentJson(content, title, language)
+
+            # Resolve image file references (file:fileId) to base64 data from Knowledge Store
+            knowledgeService = None
+            try:
+                knowledgeService = services.getService("knowledge")
+            except Exception:
+                pass
+            resolvedImages = 0
+            for doc in structuredContent.get("documents", []):
+                for section in doc.get("sections", []):
+                    if section.get("content_type") != "image":
+                        continue
+                    for element in section.get("elements", []):
+                        contentObj = element.get("content", {})
+                        fileRef = contentObj.get("_fileRef", "")
+                        if not fileRef or contentObj.get("base64Data"):
+                            continue
+                        if knowledgeService:
+                            chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef)
+                            imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
+                            if imageChunks:
+                                contentObj["base64Data"] = imageChunks[0].get("data", "")
+                                chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png")
+                                contentObj["mimeType"] = chunkMime
+                                resolvedImages += 1
+                        if not contentObj.get("base64Data"):
+                            try:
+                                rawBytes = services.chat.getFileData(fileRef)
+                                if rawBytes:
+                                    import base64 as _b64
+                                    contentObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii")
+                                    contentObj["mimeType"] = "image/png"
+                                    resolvedImages += 1
+                            except Exception:
+                                pass
+                        contentObj.pop("_fileRef", None)
+                        contentObj.pop("_srcUrl", None)
+
+            sectionCount = len(structuredContent.get("documents", [{}])[0].get("sections", []))
+            logger.info(f"renderDocument: parsed {sectionCount} sections from markdown ({len(content)} chars), resolved {resolvedImages} image(s), format={outputFormat}")
+
            generationService = services.getService("generation")
-            aiService = services.ai
-
-            structuredContent = await generationService.generateDocumentWithTwoPhases(userPrompt=prompt)
-
            documents = await generationService.renderReport(
                extractedContent=structuredContent,
                outputFormat=outputFormat,
-                language="de",
+                language=language,
                title=title,
-                userPrompt=prompt,
-                aiService=aiService,
+                userPrompt=content,
            )

            if not documents:
-                return ToolResult(toolCallId="", toolName="generateDocument", success=False, error="Rendering produced no documents")
+                return ToolResult(toolCallId="", toolName="renderDocument", success=False, error="Rendering produced no output")

            savedFiles = []
+            sideEvents = []
            chatService = services.chat
-            for doc in documents:
-                docData = doc.data if hasattr(doc, "data") else doc.get("data", b"")
-                docName = doc.fileName if hasattr(doc, "fileName") else doc.get("fileName", f"{title}.{outputFormat}")
-                docMime = doc.mimeType if hasattr(doc, "mimeType") else doc.get("mimeType", "application/octet-stream")

-                fileItem = chatService.interfaceDbComponent.saveGeneratedFile(
-                    docData, docName, docMime,
-                ) if hasattr(chatService.interfaceDbComponent, "saveGeneratedFile") else None
+            sanitizedTitle = _re.sub(r'[^a-zA-Z0-9._-]', '_', title).strip('_') or "document"
+
+            for doc in documents:
+                docData = doc.documentData if hasattr(doc, "documentData") else b""
+                docName = doc.filename if hasattr(doc, "filename") else f"{sanitizedTitle}.{outputFormat}"
+                docMime = doc.mimeType if hasattr(doc, "mimeType") else "application/octet-stream"
+
+                if not docName.lower().endswith(f".{outputFormat}"):
+                    docName = f"{sanitizedTitle}.{outputFormat}"
+
+                fileItem = None
+                if hasattr(chatService.interfaceDbComponent, "saveGeneratedFile"):
+                    fileItem = chatService.interfaceDbComponent.saveGeneratedFile(docData, docName, docMime)
+                else:
+                    fileItem, _ = chatService.interfaceDbComponent.saveUploadedFile(docData, docName)

                if fileItem:
-                    savedFiles.append(f"- {docName} (id: {fileItem.id if hasattr(fileItem, 'id') else fileItem.get('id', '?')})")
-                else:
-                    savedFiles.append(f"- {docName} (generated, not saved)")
+                    fid = fileItem.id if hasattr(fileItem, "id") else fileItem.get("id", "?")
+                    savedFiles.append(f"- {docName} (id: {fid})")
+                    sideEvents.append({
+                        "type": "fileCreated",
+                        "data": {
+                            "fileId": fid,
+                            "fileName": docName,
+                            "mimeType": docMime,
+                            "fileSize": len(docData),
+                        },
+                    })

-            result = f"Generated {len(documents)} document(s):\n" + "\n".join(savedFiles)
-            return ToolResult(toolCallId="", toolName="generateDocument", success=True, data=result)
+            result = f"Rendered {len(documents)} document(s):\n" + "\n".join(savedFiles)
+            return ToolResult(toolCallId="", toolName="renderDocument", success=True, data=result, sideEvents=sideEvents)

        except Exception as e:
-            return ToolResult(toolCallId="", toolName="generateDocument", success=False, error=str(e))
+            logger.error(f"renderDocument failed: {e}")
+            return ToolResult(toolCallId="", toolName="renderDocument", success=False, error=str(e))

    registry.register(
-        "generateDocument", _generateDocument,
-        description="Generate a document in any format (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT).",
+        "renderDocument", _renderDocument,
+        description=(
+            "Render markdown content into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
+            "You write the full document content as markdown, then this tool converts and renders it. "
+            "To embed images from uploaded files, use markdown image syntax with the file ID: ![alt text](file:fileId). "
+            "The images will be resolved from the Knowledge Store and embedded in the output document."
+        ),
        parameters={
            "type": "object",
            "properties": {
-                "prompt": {"type": "string", "description": "What the document should contain and how it should look"},
+                "content": {"type": "string", "description": "Full document content as markdown (headings, tables, lists, code blocks, paragraphs, images via ![alt](file:fileId))"},
                "outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
-                "title": {"type": "string", "description": "Document title", "default": "Generated Document"},
+                "title": {"type": "string", "description": "Document title", "default": "Document"},
+                "language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
            },
-            "required": ["prompt"],
+            "required": ["content"],
+        },
+        readOnly=False,
+    )
+
+    # ── textToSpeech tool ──────────────────────────────────────────────
+    def _stripMarkdownForTts(text: str) -> str:
+        """Strip markdown formatting so TTS reads clean speech text."""
+        import re as _re
+        t = text
+        t = _re.sub(r'\*\*(.+?)\*\*', r'\1', t)
+        t = _re.sub(r'\*(.+?)\*', r'\1', t)
+        t = _re.sub(r'__(.+?)__', r'\1', t)
+        t = _re.sub(r'_(.+?)_', r'\1', t)
+        t = _re.sub(r'`[^`]+`', lambda m: m.group(0)[1:-1], t)
+        t = _re.sub(r'^#{1,6}\s*', '', t, flags=_re.MULTILINE)
+        t = _re.sub(r'^\s*[-*+]\s+', '', t, flags=_re.MULTILINE)
+        t = _re.sub(r'^\s*\d+\.\s+', '', t, flags=_re.MULTILINE)
+        t = _re.sub(r'\[(.+?)\]\(.+?\)', r'\1', t)
+        t = _re.sub(r'!\[.*?\]\(.*?\)', '', t)
+        t = _re.sub(r'\n{3,}', '\n\n', t)
+        return t.strip()
+
+    async def _textToSpeech(args: Dict[str, Any], context: Dict[str, Any]):
+        """Convert text to speech using Google Cloud TTS, deliver audio via SSE."""
+        import base64 as _b64
+        text = args.get("text", "")
+        language = args.get("language", "auto")
+        voiceName = args.get("voiceName")
+
+        if not text:
+            return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="text is required")
+
+        cleanText = _stripMarkdownForTts(text)
+        if not cleanText:
+            return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="text is empty after stripping markdown")
+
+        try:
+            from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
+            mandateId = context.get("mandateId", "")
+            voiceInterface = getVoiceInterface(currentUser=None, mandateId=mandateId)
+
+            _ISO_TO_BCP47 = {
+                "de": "de-DE", "en": "en-US", "fr": "fr-FR", "it": "it-IT",
+                "es": "es-ES", "pt": "pt-BR", "nl": "nl-NL", "pl": "pl-PL",
+                "ru": "ru-RU", "ja": "ja-JP", "zh": "zh-CN", "ko": "ko-KR",
+                "ar": "ar-XA", "hi": "hi-IN", "tr": "tr-TR", "sv": "sv-SE",
+            }
+
+            if language == "auto":
+                try:
+                    snippet = cleanText[:500]
+                    detectResult = await voiceInterface.detectLanguage(snippet)
+                    if detectResult and detectResult.get("success"):
+                        detected = detectResult.get("language", "de")
+                        language = _ISO_TO_BCP47.get(detected, detected)
+                        if "-" not in language:
+                            language = _ISO_TO_BCP47.get(language, f"{language}-{language.upper()}")
+                        logger.info(f"textToSpeech: auto-detected language '{detected}' -> '{language}'")
+                    else:
+                        language = "de-DE"
+                except Exception as detectErr:
+                    logger.warning(f"textToSpeech: language detection failed: {detectErr}, defaulting to de-DE")
+                    language = "de-DE"
+
+            if not voiceName:
+                try:
+                    featureInstanceId = context.get("featureInstanceId", "")
+                    userId = context.get("userId", "")
+                    if featureInstanceId and userId:
+                        dbMgmt = services.chat.interfaceDbApp if hasattr(services.chat, "interfaceDbApp") else None
+                        if dbMgmt and hasattr(dbMgmt, "getVoiceSettings"):
+                            vs = dbMgmt.getVoiceSettings(userId)
+                            if vs:
+                                voiceMap = {}
+                                if hasattr(vs, "ttsVoiceMap") and vs.ttsVoiceMap:
+                                    voiceMap = vs.ttsVoiceMap if isinstance(vs.ttsVoiceMap, dict) else {}
+                                if language in voiceMap:
+                                    voiceName = voiceMap[language].get("voiceName") if isinstance(voiceMap[language], dict) else voiceMap[language]
+                                    logger.info(f"textToSpeech: using configured voice '{voiceName}' for {language}")
+                                elif hasattr(vs, "ttsVoice") and vs.ttsVoice and hasattr(vs, "ttsLanguage") and vs.ttsLanguage == language:
+                                    voiceName = vs.ttsVoice
+                except Exception as prefErr:
+                    logger.debug(f"textToSpeech: could not load voice preferences: {prefErr}")
+
+            ttsResult = await voiceInterface.textToSpeech(
+                text=cleanText,
+                languageCode=language,
+                voiceName=voiceName,
+            )
+
+            if not ttsResult or not ttsResult.get("success"):
+                errMsg = ttsResult.get("error", "TTS call failed") if ttsResult else "TTS returned None"
+                return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error=errMsg)
+
+            audioContent = ttsResult.get("audioContent", "")
+            if not audioContent:
+                return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="TTS returned no audio")
+
+            if isinstance(audioContent, bytes):
+                audioB64 = _b64.b64encode(audioContent).decode("ascii")
+            elif isinstance(audioContent, str):
+                audioB64 = audioContent
+            else:
+                audioB64 = str(audioContent)
+
+            audioFormat = ttsResult.get("audioFormat", "mp3")
+            charCount = len(cleanText)
+            usedVoice = voiceName or "default"
+            logger.info(f"textToSpeech: generated {audioFormat} audio for {charCount} chars, language={language}, voice={usedVoice}")
+
+            return ToolResult(
+                toolCallId="", toolName="textToSpeech", success=True,
+                data=f"Audio generated ({charCount} characters, language={language}, voice={usedVoice}). Playing in chat.",
+                sideEvents=[{
+                    "type": "voiceResponse",
+                    "data": {
+                        "audio": audioB64,
+                        "format": audioFormat,
+                        "language": language,
+                        "charCount": charCount,
+                    },
+                }],
+            )
+
+        except ImportError:
+            return ToolResult(toolCallId="", toolName="textToSpeech", success=False,
+                              error="Voice interface not available (missing dependency)")
+        except Exception as e:
+            logger.error(f"textToSpeech failed: {e}")
+            return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error=str(e))
+
+    registry.register(
+        "textToSpeech", _textToSpeech,
+        description=(
+            "Convert text to speech audio. The audio is played directly in the chat. "
+            "Use this when the user asks you to read something aloud, narrate, or speak. "
+            "Language is auto-detected from the text content. You do NOT need to specify a language."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "text": {"type": "string", "description": "The text to convert to speech. Can include markdown (will be stripped automatically)."},
+                "language": {"type": "string", "description": "BCP-47 language code (e.g. de-DE, en-US) or 'auto' for automatic detection", "default": "auto"},
+                "voiceName": {"type": "string", "description": "Optional specific voice name. If omitted, uses the configured voice for the detected language."},
+            },
+            "required": ["text"],
        },
        readOnly=False,
    )