diff --git a/modules/connectors/providerMsft/connectorMsft.py b/modules/connectors/providerMsft/connectorMsft.py index 105ae8fc..26aa3790 100644 --- a/modules/connectors/providerMsft/connectorMsft.py +++ b/modules/connectors/providerMsft/connectorMsft.py @@ -229,15 +229,15 @@ class OutlookAdapter(_GraphApiMixin, ServiceAdapter): return [ ExternalEntry( name=f.get("displayName", ""), - path=f"/{f.get('displayName', '')}", + path=f"/{f.get('id', '')}", isFolder=True, metadata={"id": f.get("id"), "totalItemCount": f.get("totalItemCount")}, ) for f in result.get("value", []) ] - folderName = path.strip("/") - endpoint = f"me/mailFolders/{folderName}/messages?$top=25&$orderby=receivedDateTime desc" + folderId = path.strip("/") + endpoint = f"me/mailFolders/{folderId}/messages?$top=25&$orderby=receivedDateTime desc" result = await self._graphGet(endpoint) if "error" in result: return [] diff --git a/modules/datamodels/datamodelVoice.py b/modules/datamodels/datamodelVoice.py index 86f4bb1d..2223a3e6 100644 --- a/modules/datamodels/datamodelVoice.py +++ b/modules/datamodels/datamodelVoice.py @@ -2,6 +2,7 @@ # All rights reserved. """Voice settings datamodel.""" +from typing import Dict, Any, Optional from pydantic import BaseModel, Field from modules.shared.attributeUtils import registerModelLabels from modules.shared.timeUtils import getUtcTimestamp @@ -16,6 +17,7 @@ class VoiceSettings(BaseModel): sttLanguage: str = Field(default="de-DE", description="Speech-to-Text language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True}) ttsLanguage: str = Field(default="de-DE", description="Text-to-Speech language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True}) ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True}) + ttsVoiceMap: Dict[str, Any] = Field(default_factory=dict, description="Per-language voice mapping, e.g. {'de-DE': {'voiceName': 'de-DE-Wavenet-A'}, 'en-US': {'voiceName': 'en-US-Wavenet-C'}}", json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False}) translationEnabled: bool = Field(default=True, description="Whether translation is enabled", json_schema_extra={"frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False}) targetLanguage: str = Field(default="en-US", description="Target language for translation", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False}) creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False}) @@ -33,6 +35,7 @@ registerModelLabels( "sttLanguage": {"en": "STT Language", "fr": "Langue STT"}, "ttsLanguage": {"en": "TTS Language", "fr": "Langue TTS"}, "ttsVoice": {"en": "TTS Voice", "fr": "Voix TTS"}, + "ttsVoiceMap": {"en": "TTS Voice Map", "fr": "Carte des voix TTS"}, "translationEnabled": {"en": "Translation Enabled", "fr": "Traduction activée"}, "targetLanguage": {"en": "Target Language", "fr": "Langue cible"}, "creationDate": {"en": "Creation Date", "fr": "Date de création"}, diff --git a/modules/features/workspace/routeFeatureWorkspace.py b/modules/features/workspace/routeFeatureWorkspace.py index 2188f8d5..5b1dc679 100644 --- a/modules/features/workspace/routeFeatureWorkspace.py +++ b/modules/features/workspace/routeFeatureWorkspace.py @@ -718,3 +718,120 @@ async def synthesizeVoice( if not text: raise HTTPException(status_code=400, detail="text is required") return JSONResponse({"audio": None, "note": "TTS via browser Speech Synthesis API recommended"}) + + +# ========================================================================= +# Voice Settings Endpoints +# ========================================================================= + +@router.get("/{instanceId}/settings/voice") +@limiter.limit("30/minute") +async def getVoiceSettings( + request: Request, + instanceId: str = Path(...), + context: RequestContext = Depends(getRequestContext), +): + """Load voice settings for the current user and instance.""" + _validateInstanceAccess(instanceId, context) + dbMgmt = _getDbManagement(context, instanceId) + userId = str(context.user.id) + vs = dbMgmt.getVoiceSettings(userId) + if not vs: + vs = dbMgmt.getOrCreateVoiceSettings(userId) + result = vs.model_dump() if vs else {} + return JSONResponse(result) + + +@router.put("/{instanceId}/settings/voice") +@limiter.limit("30/minute") +async def updateVoiceSettings( + request: Request, + instanceId: str = Path(...), + body: dict = Body(...), + context: RequestContext = Depends(getRequestContext), +): + """Update voice settings for the current user and instance.""" + _validateInstanceAccess(instanceId, context) + dbMgmt = _getDbManagement(context, instanceId) + userId = str(context.user.id) + + vs = dbMgmt.getVoiceSettings(userId) + if not vs: + createData = { + "userId": userId, + "mandateId": str(context.mandateId) if context.mandateId else "", + "featureInstanceId": instanceId, + } + createData.update(body) + created = dbMgmt.createVoiceSettings(createData) + return JSONResponse(created) + + updateData = {k: v for k, v in body.items() if k not in ("id", "userId", "mandateId", "featureInstanceId", "creationDate")} + updated = dbMgmt.updateVoiceSettings(userId, updateData) + return JSONResponse(updated) + + +@router.get("/{instanceId}/voice/languages") +@limiter.limit("30/minute") +async def getVoiceLanguages( + request: Request, + instanceId: str = Path(...), + context: RequestContext = Depends(getRequestContext), +): + """Return available TTS languages.""" + mandateId = _validateInstanceAccess(instanceId, context) + from modules.interfaces.interfaceVoiceObjects import getVoiceInterface + voiceInterface = getVoiceInterface(context.user, mandateId) + languagesResult = await voiceInterface.getAvailableLanguages() + languageList = languagesResult.get("languages", []) if isinstance(languagesResult, dict) else languagesResult + return JSONResponse({"languages": languageList}) + + +@router.get("/{instanceId}/voice/voices") +@limiter.limit("30/minute") +async def getVoiceVoices( + request: Request, + instanceId: str = Path(...), + language: str = Query("de-DE"), + context: RequestContext = Depends(getRequestContext), +): + """Return available TTS voices for a given language.""" + mandateId = _validateInstanceAccess(instanceId, context) + from modules.interfaces.interfaceVoiceObjects import getVoiceInterface + voiceInterface = getVoiceInterface(context.user, mandateId) + voicesResult = await voiceInterface.getAvailableVoices(language) + voiceList = voicesResult.get("voices", []) if isinstance(voicesResult, dict) else voicesResult + return JSONResponse({"voices": voiceList}) + + +@router.post("/{instanceId}/voice/test") +@limiter.limit("10/minute") +async def testVoice( + request: Request, + instanceId: str = Path(...), + body: dict = Body(...), + context: RequestContext = Depends(getRequestContext), +): + """Test a specific voice with a sample text.""" + import base64 + mandateId = _validateInstanceAccess(instanceId, context) + text = body.get("text", "Hallo, das ist ein Stimmtest.") + language = body.get("language", "de-DE") + voiceId = body.get("voiceId") + + from modules.interfaces.interfaceVoiceObjects import getVoiceInterface + voiceInterface = getVoiceInterface(context.user, mandateId) + + try: + result = await voiceInterface.textToSpeech(text=text, languageCode=language, voiceName=voiceId) + if result and isinstance(result, dict): + audioContent = result.get("audioContent") + if audioContent: + audioB64 = base64.b64encode( + audioContent if isinstance(audioContent, bytes) else audioContent.encode() + ).decode() + return JSONResponse({"success": True, "audio": audioB64, "format": "mp3", "text": text}) + return JSONResponse({"success": False, "error": "TTS returned no audio"}) + except Exception as e: + logger.error(f"Voice test failed: {e}") + raise HTTPException(status_code=500, detail=f"TTS test failed: {str(e)}") diff --git a/modules/routes/routeSecurityGoogle.py b/modules/routes/routeSecurityGoogle.py index 82e7cccd..ad0dcd52 100644 --- a/modules/routes/routeSecurityGoogle.py +++ b/modules/routes/routeSecurityGoogle.py @@ -87,9 +87,10 @@ CLIENT_SECRET = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET") REDIRECT_URI = APP_CONFIG.get("Service_GOOGLE_REDIRECT_URI") SCOPES = [ "https://www.googleapis.com/auth/gmail.readonly", + "https://www.googleapis.com/auth/drive.readonly", "https://www.googleapis.com/auth/userinfo.profile", "https://www.googleapis.com/auth/userinfo.email", - "openid" + "openid", ] @router.get("/config") diff --git a/modules/routes/routeSecurityMsft.py b/modules/routes/routeSecurityMsft.py index 11d35915..97604e67 100644 --- a/modules/routes/routeSecurityMsft.py +++ b/modules/routes/routeSecurityMsft.py @@ -59,6 +59,7 @@ SCOPES = [ "Mail.Send", # Send mail "Files.ReadWrite.All", # Read and write files (SharePoint/OneDrive) "Sites.ReadWrite.All", # Read and write SharePoint sites + "Team.ReadBasic.All", # List joined teams and channels # Teams Bot: Meeting and chat access (requires admin consent) "OnlineMeetings.Read", # Read user's Teams meeting details (delegated scope) "Chat.ReadWrite", # Read and write Teams chat messages diff --git a/modules/serviceCenter/services/serviceAgent/agentLoop.py b/modules/serviceCenter/services/serviceAgent/agentLoop.py index 02d072be..1636db07 100644 --- a/modules/serviceCenter/services/serviceAgent/agentLoop.py +++ b/modules/serviceCenter/services/serviceAgent/agentLoop.py @@ -208,7 +208,8 @@ async def runAgentLoop( results = await _executeToolCalls(toolCalls, toolRegistry, { "workflowId": workflowId, "userId": userId, - "featureInstanceId": featureInstanceId + "featureInstanceId": featureInstanceId, + "mandateId": mandateId, }) state.totalToolCalls += len(results) diff --git a/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py b/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py index 05ce4da9..59655442 100644 --- a/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py +++ b/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py @@ -209,7 +209,8 @@ class AgentService: "## Attached Files\n" "These files have been uploaded and processed through the extraction pipeline.\n" "Use `readFile(fileId)` to read text content, `readContentObjects(fileId)` for structured access, " - "or `describeImage(fileId)` for image analysis.\n\n" + "or `describeImage(fileId)` for image analysis.\n" + "When generating documents with `renderDocument`, embed images using `![alt text](file:fileId)` in the markdown content.\n\n" ) header += "\n\n".join(fileDescriptions) return f"{header}\n\n---\n\nUser request: {prompt}" @@ -1226,68 +1227,415 @@ def _registerCoreTools(registry: ToolRegistry, services): readOnly=True, ) - # ---- Document generation tool ---- + # ---- Document rendering tool ---- - async def _generateDocument(args: Dict[str, Any], context: Dict[str, Any]): - """Generate a document in any format using the existing GenerationService + RendererRegistry.""" - prompt = args.get("prompt", "") + def _markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]: + """Convert markdown content to the standard document JSON format expected by renderers.""" + import re as _re + + sections = [] + order = 0 + lines = markdown.split("\n") + i = 0 + + def _nextId(): + nonlocal order + order += 1 + return f"s_{order}" + + while i < len(lines): + line = lines[i] + + # --- Headings --- + headingMatch = _re.match(r'^(#{1,6})\s+(.+)', line) + if headingMatch: + level = len(headingMatch.group(1)) + text = headingMatch.group(2).strip() + sections.append({ + "id": _nextId(), "content_type": "heading", "order": order, + "elements": [{"content": {"text": text, "level": level}}], + }) + i += 1 + continue + + # --- Fenced code blocks --- + codeMatch = _re.match(r'^```(\w*)', line) + if codeMatch: + lang = codeMatch.group(1) or "text" + codeLines = [] + i += 1 + while i < len(lines) and not lines[i].startswith("```"): + codeLines.append(lines[i]) + i += 1 + i += 1 + sections.append({ + "id": _nextId(), "content_type": "code_block", "order": order, + "elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}], + }) + continue + + # --- Tables --- + tableMatch = _re.match(r'^\|(.+)\|$', line) + if tableMatch and (i + 1) < len(lines) and _re.match(r'^\|[\s\-:|]+\|$', lines[i + 1]): + headerCells = [c.strip() for c in tableMatch.group(1).split("|")] + i += 2 + rows = [] + while i < len(lines) and _re.match(r'^\|(.+)\|$', lines[i]): + rowCells = [c.strip() for c in lines[i][1:-1].split("|")] + rows.append(rowCells) + i += 1 + sections.append({ + "id": _nextId(), "content_type": "table", "order": order, + "elements": [{"content": {"headers": headerCells, "rows": rows}}], + }) + continue + + # --- Bullet / numbered lists --- + listMatch = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', line) + if listMatch: + isNumbered = bool(_re.match(r'\d+[.)]', listMatch.group(2))) + items = [] + while i < len(lines) and _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]): + m = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]) + items.append({"text": m.group(3).strip()}) + i += 1 + sections.append({ + "id": _nextId(), "content_type": "bullet_list", "order": order, + "elements": [{"content": {"items": items, "list_type": "numbered" if isNumbered else "bullet"}}], + }) + continue + + # --- Empty lines (skip) --- + if not line.strip(): + i += 1 + continue + + # --- Images: ![alt](file:fileId) or ![alt](url) --- + imgMatch = _re.match(r'^!\[([^\]]*)\]\(([^)]+)\)', line) + if imgMatch: + altText = imgMatch.group(1).strip() or "Image" + src = imgMatch.group(2).strip() + fileId = "" + if src.startswith("file:"): + fileId = src[5:] + sections.append({ + "id": _nextId(), "content_type": "image", "order": order, + "elements": [{ + "content": { + "altText": altText, + "base64Data": "", + "_fileRef": fileId, + "_srcUrl": src if not fileId else "", + } + }], + }) + i += 1 + continue + + # --- Paragraph (collect consecutive non-empty lines) --- + paraLines = [] + while i < len(lines) and lines[i].strip() and not _re.match(r'^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )', lines[i]): + paraLines.append(lines[i]) + i += 1 + if paraLines: + sections.append({ + "id": _nextId(), "content_type": "paragraph", "order": order, + "elements": [{"content": {"text": " ".join(paraLines)}}], + }) + continue + + i += 1 + + if not sections: + sections.append({ + "id": _nextId(), "content_type": "paragraph", "order": order, + "elements": [{"content": {"text": markdown.strip() or "(empty)"}}], + }) + + return { + "metadata": { + "split_strategy": "single_document", + "source_documents": [], + "extraction_method": "agent_rendering", + "title": title, + "language": language, + }, + "documents": [{ + "id": "doc_1", + "title": title, + "sections": sections, + }], + } + + async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]): + """Render agent-produced markdown content into any document format via the RendererRegistry.""" + import re as _re + content = args.get("content", "") outputFormat = args.get("outputFormat", "pdf") - title = args.get("title", "Generated Document") + title = args.get("title", "Document") + language = args.get("language", "de") - if not prompt: - return ToolResult(toolCallId="", toolName="generateDocument", success=False, error="prompt is required") + if not content: + return ToolResult(toolCallId="", toolName="renderDocument", success=False, error="content is required") try: + structuredContent = _markdownToDocumentJson(content, title, language) + + # Resolve image file references (file:fileId) to base64 data from Knowledge Store + knowledgeService = None + try: + knowledgeService = services.getService("knowledge") + except Exception: + pass + resolvedImages = 0 + for doc in structuredContent.get("documents", []): + for section in doc.get("sections", []): + if section.get("content_type") != "image": + continue + for element in section.get("elements", []): + contentObj = element.get("content", {}) + fileRef = contentObj.get("_fileRef", "") + if not fileRef or contentObj.get("base64Data"): + continue + if knowledgeService: + chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef) + imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"] + if imageChunks: + contentObj["base64Data"] = imageChunks[0].get("data", "") + chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png") + contentObj["mimeType"] = chunkMime + resolvedImages += 1 + if not contentObj.get("base64Data"): + try: + rawBytes = services.chat.getFileData(fileRef) + if rawBytes: + import base64 as _b64 + contentObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii") + contentObj["mimeType"] = "image/png" + resolvedImages += 1 + except Exception: + pass + contentObj.pop("_fileRef", None) + contentObj.pop("_srcUrl", None) + + sectionCount = len(structuredContent.get("documents", [{}])[0].get("sections", [])) + logger.info(f"renderDocument: parsed {sectionCount} sections from markdown ({len(content)} chars), resolved {resolvedImages} image(s), format={outputFormat}") + generationService = services.getService("generation") - aiService = services.ai - - structuredContent = await generationService.generateDocumentWithTwoPhases(userPrompt=prompt) - documents = await generationService.renderReport( extractedContent=structuredContent, outputFormat=outputFormat, - language="de", + language=language, title=title, - userPrompt=prompt, - aiService=aiService, + userPrompt=content, ) if not documents: - return ToolResult(toolCallId="", toolName="generateDocument", success=False, error="Rendering produced no documents") + return ToolResult(toolCallId="", toolName="renderDocument", success=False, error="Rendering produced no output") savedFiles = [] + sideEvents = [] chatService = services.chat - for doc in documents: - docData = doc.data if hasattr(doc, "data") else doc.get("data", b"") - docName = doc.fileName if hasattr(doc, "fileName") else doc.get("fileName", f"{title}.{outputFormat}") - docMime = doc.mimeType if hasattr(doc, "mimeType") else doc.get("mimeType", "application/octet-stream") - fileItem = chatService.interfaceDbComponent.saveGeneratedFile( - docData, docName, docMime, - ) if hasattr(chatService.interfaceDbComponent, "saveGeneratedFile") else None + sanitizedTitle = _re.sub(r'[^a-zA-Z0-9._-]', '_', title).strip('_') or "document" + + for doc in documents: + docData = doc.documentData if hasattr(doc, "documentData") else b"" + docName = doc.filename if hasattr(doc, "filename") else f"{sanitizedTitle}.{outputFormat}" + docMime = doc.mimeType if hasattr(doc, "mimeType") else "application/octet-stream" + + if not docName.lower().endswith(f".{outputFormat}"): + docName = f"{sanitizedTitle}.{outputFormat}" + + fileItem = None + if hasattr(chatService.interfaceDbComponent, "saveGeneratedFile"): + fileItem = chatService.interfaceDbComponent.saveGeneratedFile(docData, docName, docMime) + else: + fileItem, _ = chatService.interfaceDbComponent.saveUploadedFile(docData, docName) if fileItem: - savedFiles.append(f"- {docName} (id: {fileItem.id if hasattr(fileItem, 'id') else fileItem.get('id', '?')})") - else: - savedFiles.append(f"- {docName} (generated, not saved)") + fid = fileItem.id if hasattr(fileItem, "id") else fileItem.get("id", "?") + savedFiles.append(f"- {docName} (id: {fid})") + sideEvents.append({ + "type": "fileCreated", + "data": { + "fileId": fid, + "fileName": docName, + "mimeType": docMime, + "fileSize": len(docData), + }, + }) - result = f"Generated {len(documents)} document(s):\n" + "\n".join(savedFiles) - return ToolResult(toolCallId="", toolName="generateDocument", success=True, data=result) + result = f"Rendered {len(documents)} document(s):\n" + "\n".join(savedFiles) + return ToolResult(toolCallId="", toolName="renderDocument", success=True, data=result, sideEvents=sideEvents) except Exception as e: - return ToolResult(toolCallId="", toolName="generateDocument", success=False, error=str(e)) + logger.error(f"renderDocument failed: {e}") + return ToolResult(toolCallId="", toolName="renderDocument", success=False, error=str(e)) registry.register( - "generateDocument", _generateDocument, - description="Generate a document in any format (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT).", + "renderDocument", _renderDocument, + description=( + "Render markdown content into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). " + "You write the full document content as markdown, then this tool converts and renders it. " + "To embed images from uploaded files, use markdown image syntax with the file ID: ![alt text](file:fileId). " + "The images will be resolved from the Knowledge Store and embedded in the output document." + ), parameters={ "type": "object", "properties": { - "prompt": {"type": "string", "description": "What the document should contain and how it should look"}, + "content": {"type": "string", "description": "Full document content as markdown (headings, tables, lists, code blocks, paragraphs, images via ![alt](file:fileId))"}, "outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"}, - "title": {"type": "string", "description": "Document title", "default": "Generated Document"}, + "title": {"type": "string", "description": "Document title", "default": "Document"}, + "language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"}, }, - "required": ["prompt"], + "required": ["content"], + }, + readOnly=False, + ) + + # ── textToSpeech tool ────────────────────────────────────────────── + def _stripMarkdownForTts(text: str) -> str: + """Strip markdown formatting so TTS reads clean speech text.""" + import re as _re + t = text + t = _re.sub(r'\*\*(.+?)\*\*', r'\1', t) + t = _re.sub(r'\*(.+?)\*', r'\1', t) + t = _re.sub(r'__(.+?)__', r'\1', t) + t = _re.sub(r'_(.+?)_', r'\1', t) + t = _re.sub(r'`[^`]+`', lambda m: m.group(0)[1:-1], t) + t = _re.sub(r'^#{1,6}\s*', '', t, flags=_re.MULTILINE) + t = _re.sub(r'^\s*[-*+]\s+', '', t, flags=_re.MULTILINE) + t = _re.sub(r'^\s*\d+\.\s+', '', t, flags=_re.MULTILINE) + t = _re.sub(r'\[(.+?)\]\(.+?\)', r'\1', t) + t = _re.sub(r'!\[.*?\]\(.*?\)', '', t) + t = _re.sub(r'\n{3,}', '\n\n', t) + return t.strip() + + async def _textToSpeech(args: Dict[str, Any], context: Dict[str, Any]): + """Convert text to speech using Google Cloud TTS, deliver audio via SSE.""" + import base64 as _b64 + text = args.get("text", "") + language = args.get("language", "auto") + voiceName = args.get("voiceName") + + if not text: + return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="text is required") + + cleanText = _stripMarkdownForTts(text) + if not cleanText: + return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="text is empty after stripping markdown") + + try: + from modules.interfaces.interfaceVoiceObjects import getVoiceInterface + mandateId = context.get("mandateId", "") + voiceInterface = getVoiceInterface(currentUser=None, mandateId=mandateId) + + _ISO_TO_BCP47 = { + "de": "de-DE", "en": "en-US", "fr": "fr-FR", "it": "it-IT", + "es": "es-ES", "pt": "pt-BR", "nl": "nl-NL", "pl": "pl-PL", + "ru": "ru-RU", "ja": "ja-JP", "zh": "zh-CN", "ko": "ko-KR", + "ar": "ar-XA", "hi": "hi-IN", "tr": "tr-TR", "sv": "sv-SE", + } + + if language == "auto": + try: + snippet = cleanText[:500] + detectResult = await voiceInterface.detectLanguage(snippet) + if detectResult and detectResult.get("success"): + detected = detectResult.get("language", "de") + language = _ISO_TO_BCP47.get(detected, detected) + if "-" not in language: + language = _ISO_TO_BCP47.get(language, f"{language}-{language.upper()}") + logger.info(f"textToSpeech: auto-detected language '{detected}' -> '{language}'") + else: + language = "de-DE" + except Exception as detectErr: + logger.warning(f"textToSpeech: language detection failed: {detectErr}, defaulting to de-DE") + language = "de-DE" + + if not voiceName: + try: + featureInstanceId = context.get("featureInstanceId", "") + userId = context.get("userId", "") + if featureInstanceId and userId: + dbMgmt = services.chat.interfaceDbApp if hasattr(services.chat, "interfaceDbApp") else None + if dbMgmt and hasattr(dbMgmt, "getVoiceSettings"): + vs = dbMgmt.getVoiceSettings(userId) + if vs: + voiceMap = {} + if hasattr(vs, "ttsVoiceMap") and vs.ttsVoiceMap: + voiceMap = vs.ttsVoiceMap if isinstance(vs.ttsVoiceMap, dict) else {} + if language in voiceMap: + voiceName = voiceMap[language].get("voiceName") if isinstance(voiceMap[language], dict) else voiceMap[language] + logger.info(f"textToSpeech: using configured voice '{voiceName}' for {language}") + elif hasattr(vs, "ttsVoice") and vs.ttsVoice and hasattr(vs, "ttsLanguage") and vs.ttsLanguage == language: + voiceName = vs.ttsVoice + except Exception as prefErr: + logger.debug(f"textToSpeech: could not load voice preferences: {prefErr}") + + ttsResult = await voiceInterface.textToSpeech( + text=cleanText, + languageCode=language, + voiceName=voiceName, + ) + + if not ttsResult or not ttsResult.get("success"): + errMsg = ttsResult.get("error", "TTS call failed") if ttsResult else "TTS returned None" + return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error=errMsg) + + audioContent = ttsResult.get("audioContent", "") + if not audioContent: + return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="TTS returned no audio") + + if isinstance(audioContent, bytes): + audioB64 = _b64.b64encode(audioContent).decode("ascii") + elif isinstance(audioContent, str): + audioB64 = audioContent + else: + audioB64 = str(audioContent) + + audioFormat = ttsResult.get("audioFormat", "mp3") + charCount = len(cleanText) + usedVoice = voiceName or "default" + logger.info(f"textToSpeech: generated {audioFormat} audio for {charCount} chars, language={language}, voice={usedVoice}") + + return ToolResult( + toolCallId="", toolName="textToSpeech", success=True, + data=f"Audio generated ({charCount} characters, language={language}, voice={usedVoice}). Playing in chat.", + sideEvents=[{ + "type": "voiceResponse", + "data": { + "audio": audioB64, + "format": audioFormat, + "language": language, + "charCount": charCount, + }, + }], + ) + + except ImportError: + return ToolResult(toolCallId="", toolName="textToSpeech", success=False, + error="Voice interface not available (missing dependency)") + except Exception as e: + logger.error(f"textToSpeech failed: {e}") + return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error=str(e)) + + registry.register( + "textToSpeech", _textToSpeech, + description=( + "Convert text to speech audio. The audio is played directly in the chat. " + "Use this when the user asks you to read something aloud, narrate, or speak. " + "Language is auto-detected from the text content. You do NOT need to specify a language." + ), + parameters={ + "type": "object", + "properties": { + "text": {"type": "string", "description": "The text to convert to speech. Can include markdown (will be stripped automatically)."}, + "language": {"type": "string", "description": "BCP-47 language code (e.g. de-DE, en-US) or 'auto' for automatic detection", "default": "auto"}, + "voiceName": {"type": "string", "description": "Optional specific voice name. If omitted, uses the configured voice for the detected language."}, + }, + "required": ["text"], }, readOnly=False, )