enhanced stt/tts functions
This commit is contained in:
parent
7fe6f9bc97
commit
3d49bd9d03
7 changed files with 510 additions and 39 deletions
|
|
@ -229,15 +229,15 @@ class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
|
|||
return [
|
||||
ExternalEntry(
|
||||
name=f.get("displayName", ""),
|
||||
path=f"/{f.get('displayName', '')}",
|
||||
path=f"/{f.get('id', '')}",
|
||||
isFolder=True,
|
||||
metadata={"id": f.get("id"), "totalItemCount": f.get("totalItemCount")},
|
||||
)
|
||||
for f in result.get("value", [])
|
||||
]
|
||||
|
||||
folderName = path.strip("/")
|
||||
endpoint = f"me/mailFolders/{folderName}/messages?$top=25&$orderby=receivedDateTime desc"
|
||||
folderId = path.strip("/")
|
||||
endpoint = f"me/mailFolders/{folderId}/messages?$top=25&$orderby=receivedDateTime desc"
|
||||
result = await self._graphGet(endpoint)
|
||||
if "error" in result:
|
||||
return []
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
# All rights reserved.
|
||||
"""Voice settings datamodel."""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
|
|
@ -16,6 +17,7 @@ class VoiceSettings(BaseModel):
|
|||
sttLanguage: str = Field(default="de-DE", description="Speech-to-Text language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
|
||||
ttsLanguage: str = Field(default="de-DE", description="Text-to-Speech language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
|
||||
ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
|
||||
ttsVoiceMap: Dict[str, Any] = Field(default_factory=dict, description="Per-language voice mapping, e.g. {'de-DE': {'voiceName': 'de-DE-Wavenet-A'}, 'en-US': {'voiceName': 'en-US-Wavenet-C'}}", json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False})
|
||||
translationEnabled: bool = Field(default=True, description="Whether translation is enabled", json_schema_extra={"frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False})
|
||||
targetLanguage: str = Field(default="en-US", description="Target language for translation", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False})
|
||||
creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False})
|
||||
|
|
@ -33,6 +35,7 @@ registerModelLabels(
|
|||
"sttLanguage": {"en": "STT Language", "fr": "Langue STT"},
|
||||
"ttsLanguage": {"en": "TTS Language", "fr": "Langue TTS"},
|
||||
"ttsVoice": {"en": "TTS Voice", "fr": "Voix TTS"},
|
||||
"ttsVoiceMap": {"en": "TTS Voice Map", "fr": "Carte des voix TTS"},
|
||||
"translationEnabled": {"en": "Translation Enabled", "fr": "Traduction activée"},
|
||||
"targetLanguage": {"en": "Target Language", "fr": "Langue cible"},
|
||||
"creationDate": {"en": "Creation Date", "fr": "Date de création"},
|
||||
|
|
|
|||
|
|
@ -718,3 +718,120 @@ async def synthesizeVoice(
|
|||
if not text:
|
||||
raise HTTPException(status_code=400, detail="text is required")
|
||||
return JSONResponse({"audio": None, "note": "TTS via browser Speech Synthesis API recommended"})
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Voice Settings Endpoints
|
||||
# =========================================================================
|
||||
|
||||
@router.get("/{instanceId}/settings/voice")
|
||||
@limiter.limit("30/minute")
|
||||
async def getVoiceSettings(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Load voice settings for the current user and instance."""
|
||||
_validateInstanceAccess(instanceId, context)
|
||||
dbMgmt = _getDbManagement(context, instanceId)
|
||||
userId = str(context.user.id)
|
||||
vs = dbMgmt.getVoiceSettings(userId)
|
||||
if not vs:
|
||||
vs = dbMgmt.getOrCreateVoiceSettings(userId)
|
||||
result = vs.model_dump() if vs else {}
|
||||
return JSONResponse(result)
|
||||
|
||||
|
||||
@router.put("/{instanceId}/settings/voice")
|
||||
@limiter.limit("30/minute")
|
||||
async def updateVoiceSettings(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
body: dict = Body(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Update voice settings for the current user and instance."""
|
||||
_validateInstanceAccess(instanceId, context)
|
||||
dbMgmt = _getDbManagement(context, instanceId)
|
||||
userId = str(context.user.id)
|
||||
|
||||
vs = dbMgmt.getVoiceSettings(userId)
|
||||
if not vs:
|
||||
createData = {
|
||||
"userId": userId,
|
||||
"mandateId": str(context.mandateId) if context.mandateId else "",
|
||||
"featureInstanceId": instanceId,
|
||||
}
|
||||
createData.update(body)
|
||||
created = dbMgmt.createVoiceSettings(createData)
|
||||
return JSONResponse(created)
|
||||
|
||||
updateData = {k: v for k, v in body.items() if k not in ("id", "userId", "mandateId", "featureInstanceId", "creationDate")}
|
||||
updated = dbMgmt.updateVoiceSettings(userId, updateData)
|
||||
return JSONResponse(updated)
|
||||
|
||||
|
||||
@router.get("/{instanceId}/voice/languages")
|
||||
@limiter.limit("30/minute")
|
||||
async def getVoiceLanguages(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Return available TTS languages."""
|
||||
mandateId = _validateInstanceAccess(instanceId, context)
|
||||
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
||||
voiceInterface = getVoiceInterface(context.user, mandateId)
|
||||
languagesResult = await voiceInterface.getAvailableLanguages()
|
||||
languageList = languagesResult.get("languages", []) if isinstance(languagesResult, dict) else languagesResult
|
||||
return JSONResponse({"languages": languageList})
|
||||
|
||||
|
||||
@router.get("/{instanceId}/voice/voices")
|
||||
@limiter.limit("30/minute")
|
||||
async def getVoiceVoices(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
language: str = Query("de-DE"),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Return available TTS voices for a given language."""
|
||||
mandateId = _validateInstanceAccess(instanceId, context)
|
||||
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
||||
voiceInterface = getVoiceInterface(context.user, mandateId)
|
||||
voicesResult = await voiceInterface.getAvailableVoices(language)
|
||||
voiceList = voicesResult.get("voices", []) if isinstance(voicesResult, dict) else voicesResult
|
||||
return JSONResponse({"voices": voiceList})
|
||||
|
||||
|
||||
@router.post("/{instanceId}/voice/test")
|
||||
@limiter.limit("10/minute")
|
||||
async def testVoice(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
body: dict = Body(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Test a specific voice with a sample text."""
|
||||
import base64
|
||||
mandateId = _validateInstanceAccess(instanceId, context)
|
||||
text = body.get("text", "Hallo, das ist ein Stimmtest.")
|
||||
language = body.get("language", "de-DE")
|
||||
voiceId = body.get("voiceId")
|
||||
|
||||
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
||||
voiceInterface = getVoiceInterface(context.user, mandateId)
|
||||
|
||||
try:
|
||||
result = await voiceInterface.textToSpeech(text=text, languageCode=language, voiceName=voiceId)
|
||||
if result and isinstance(result, dict):
|
||||
audioContent = result.get("audioContent")
|
||||
if audioContent:
|
||||
audioB64 = base64.b64encode(
|
||||
audioContent if isinstance(audioContent, bytes) else audioContent.encode()
|
||||
).decode()
|
||||
return JSONResponse({"success": True, "audio": audioB64, "format": "mp3", "text": text})
|
||||
return JSONResponse({"success": False, "error": "TTS returned no audio"})
|
||||
except Exception as e:
|
||||
logger.error(f"Voice test failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"TTS test failed: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -87,9 +87,10 @@ CLIENT_SECRET = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET")
|
|||
REDIRECT_URI = APP_CONFIG.get("Service_GOOGLE_REDIRECT_URI")
|
||||
SCOPES = [
|
||||
"https://www.googleapis.com/auth/gmail.readonly",
|
||||
"https://www.googleapis.com/auth/drive.readonly",
|
||||
"https://www.googleapis.com/auth/userinfo.profile",
|
||||
"https://www.googleapis.com/auth/userinfo.email",
|
||||
"openid"
|
||||
"openid",
|
||||
]
|
||||
|
||||
@router.get("/config")
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ SCOPES = [
|
|||
"Mail.Send", # Send mail
|
||||
"Files.ReadWrite.All", # Read and write files (SharePoint/OneDrive)
|
||||
"Sites.ReadWrite.All", # Read and write SharePoint sites
|
||||
"Team.ReadBasic.All", # List joined teams and channels
|
||||
# Teams Bot: Meeting and chat access (requires admin consent)
|
||||
"OnlineMeetings.Read", # Read user's Teams meeting details (delegated scope)
|
||||
"Chat.ReadWrite", # Read and write Teams chat messages
|
||||
|
|
|
|||
|
|
@ -208,7 +208,8 @@ async def runAgentLoop(
|
|||
results = await _executeToolCalls(toolCalls, toolRegistry, {
|
||||
"workflowId": workflowId,
|
||||
"userId": userId,
|
||||
"featureInstanceId": featureInstanceId
|
||||
"featureInstanceId": featureInstanceId,
|
||||
"mandateId": mandateId,
|
||||
})
|
||||
state.totalToolCalls += len(results)
|
||||
|
||||
|
|
|
|||
|
|
@ -209,7 +209,8 @@ class AgentService:
|
|||
"## Attached Files\n"
|
||||
"These files have been uploaded and processed through the extraction pipeline.\n"
|
||||
"Use `readFile(fileId)` to read text content, `readContentObjects(fileId)` for structured access, "
|
||||
"or `describeImage(fileId)` for image analysis.\n\n"
|
||||
"or `describeImage(fileId)` for image analysis.\n"
|
||||
"When generating documents with `renderDocument`, embed images using `` in the markdown content.\n\n"
|
||||
)
|
||||
header += "\n\n".join(fileDescriptions)
|
||||
return f"{header}\n\n---\n\nUser request: {prompt}"
|
||||
|
|
@ -1226,68 +1227,415 @@ def _registerCoreTools(registry: ToolRegistry, services):
|
|||
readOnly=True,
|
||||
)
|
||||
|
||||
# ---- Document generation tool ----
|
||||
# ---- Document rendering tool ----
|
||||
|
||||
async def _generateDocument(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
"""Generate a document in any format using the existing GenerationService + RendererRegistry."""
|
||||
prompt = args.get("prompt", "")
|
||||
def _markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
|
||||
"""Convert markdown content to the standard document JSON format expected by renderers."""
|
||||
import re as _re
|
||||
|
||||
sections = []
|
||||
order = 0
|
||||
lines = markdown.split("\n")
|
||||
i = 0
|
||||
|
||||
def _nextId():
|
||||
nonlocal order
|
||||
order += 1
|
||||
return f"s_{order}"
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
|
||||
# --- Headings ---
|
||||
headingMatch = _re.match(r'^(#{1,6})\s+(.+)', line)
|
||||
if headingMatch:
|
||||
level = len(headingMatch.group(1))
|
||||
text = headingMatch.group(2).strip()
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "heading", "order": order,
|
||||
"elements": [{"content": {"text": text, "level": level}}],
|
||||
})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# --- Fenced code blocks ---
|
||||
codeMatch = _re.match(r'^```(\w*)', line)
|
||||
if codeMatch:
|
||||
lang = codeMatch.group(1) or "text"
|
||||
codeLines = []
|
||||
i += 1
|
||||
while i < len(lines) and not lines[i].startswith("```"):
|
||||
codeLines.append(lines[i])
|
||||
i += 1
|
||||
i += 1
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "code_block", "order": order,
|
||||
"elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}],
|
||||
})
|
||||
continue
|
||||
|
||||
# --- Tables ---
|
||||
tableMatch = _re.match(r'^\|(.+)\|$', line)
|
||||
if tableMatch and (i + 1) < len(lines) and _re.match(r'^\|[\s\-:|]+\|$', lines[i + 1]):
|
||||
headerCells = [c.strip() for c in tableMatch.group(1).split("|")]
|
||||
i += 2
|
||||
rows = []
|
||||
while i < len(lines) and _re.match(r'^\|(.+)\|$', lines[i]):
|
||||
rowCells = [c.strip() for c in lines[i][1:-1].split("|")]
|
||||
rows.append(rowCells)
|
||||
i += 1
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "table", "order": order,
|
||||
"elements": [{"content": {"headers": headerCells, "rows": rows}}],
|
||||
})
|
||||
continue
|
||||
|
||||
# --- Bullet / numbered lists ---
|
||||
listMatch = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', line)
|
||||
if listMatch:
|
||||
isNumbered = bool(_re.match(r'\d+[.)]', listMatch.group(2)))
|
||||
items = []
|
||||
while i < len(lines) and _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]):
|
||||
m = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i])
|
||||
items.append({"text": m.group(3).strip()})
|
||||
i += 1
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "bullet_list", "order": order,
|
||||
"elements": [{"content": {"items": items, "list_type": "numbered" if isNumbered else "bullet"}}],
|
||||
})
|
||||
continue
|
||||
|
||||
# --- Empty lines (skip) ---
|
||||
if not line.strip():
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# --- Images:  or  ---
|
||||
imgMatch = _re.match(r'^!\[([^\]]*)\]\(([^)]+)\)', line)
|
||||
if imgMatch:
|
||||
altText = imgMatch.group(1).strip() or "Image"
|
||||
src = imgMatch.group(2).strip()
|
||||
fileId = ""
|
||||
if src.startswith("file:"):
|
||||
fileId = src[5:]
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "image", "order": order,
|
||||
"elements": [{
|
||||
"content": {
|
||||
"altText": altText,
|
||||
"base64Data": "",
|
||||
"_fileRef": fileId,
|
||||
"_srcUrl": src if not fileId else "",
|
||||
}
|
||||
}],
|
||||
})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# --- Paragraph (collect consecutive non-empty lines) ---
|
||||
paraLines = []
|
||||
while i < len(lines) and lines[i].strip() and not _re.match(r'^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )', lines[i]):
|
||||
paraLines.append(lines[i])
|
||||
i += 1
|
||||
if paraLines:
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "paragraph", "order": order,
|
||||
"elements": [{"content": {"text": " ".join(paraLines)}}],
|
||||
})
|
||||
continue
|
||||
|
||||
i += 1
|
||||
|
||||
if not sections:
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "paragraph", "order": order,
|
||||
"elements": [{"content": {"text": markdown.strip() or "(empty)"}}],
|
||||
})
|
||||
|
||||
return {
|
||||
"metadata": {
|
||||
"split_strategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "agent_rendering",
|
||||
"title": title,
|
||||
"language": language,
|
||||
},
|
||||
"documents": [{
|
||||
"id": "doc_1",
|
||||
"title": title,
|
||||
"sections": sections,
|
||||
}],
|
||||
}
|
||||
|
||||
async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
"""Render agent-produced markdown content into any document format via the RendererRegistry."""
|
||||
import re as _re
|
||||
content = args.get("content", "")
|
||||
outputFormat = args.get("outputFormat", "pdf")
|
||||
title = args.get("title", "Generated Document")
|
||||
title = args.get("title", "Document")
|
||||
language = args.get("language", "de")
|
||||
|
||||
if not prompt:
|
||||
return ToolResult(toolCallId="", toolName="generateDocument", success=False, error="prompt is required")
|
||||
if not content:
|
||||
return ToolResult(toolCallId="", toolName="renderDocument", success=False, error="content is required")
|
||||
|
||||
try:
|
||||
structuredContent = _markdownToDocumentJson(content, title, language)
|
||||
|
||||
# Resolve image file references (file:fileId) to base64 data from Knowledge Store
|
||||
knowledgeService = None
|
||||
try:
|
||||
knowledgeService = services.getService("knowledge")
|
||||
except Exception:
|
||||
pass
|
||||
resolvedImages = 0
|
||||
for doc in structuredContent.get("documents", []):
|
||||
for section in doc.get("sections", []):
|
||||
if section.get("content_type") != "image":
|
||||
continue
|
||||
for element in section.get("elements", []):
|
||||
contentObj = element.get("content", {})
|
||||
fileRef = contentObj.get("_fileRef", "")
|
||||
if not fileRef or contentObj.get("base64Data"):
|
||||
continue
|
||||
if knowledgeService:
|
||||
chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef)
|
||||
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
|
||||
if imageChunks:
|
||||
contentObj["base64Data"] = imageChunks[0].get("data", "")
|
||||
chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png")
|
||||
contentObj["mimeType"] = chunkMime
|
||||
resolvedImages += 1
|
||||
if not contentObj.get("base64Data"):
|
||||
try:
|
||||
rawBytes = services.chat.getFileData(fileRef)
|
||||
if rawBytes:
|
||||
import base64 as _b64
|
||||
contentObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii")
|
||||
contentObj["mimeType"] = "image/png"
|
||||
resolvedImages += 1
|
||||
except Exception:
|
||||
pass
|
||||
contentObj.pop("_fileRef", None)
|
||||
contentObj.pop("_srcUrl", None)
|
||||
|
||||
sectionCount = len(structuredContent.get("documents", [{}])[0].get("sections", []))
|
||||
logger.info(f"renderDocument: parsed {sectionCount} sections from markdown ({len(content)} chars), resolved {resolvedImages} image(s), format={outputFormat}")
|
||||
|
||||
generationService = services.getService("generation")
|
||||
aiService = services.ai
|
||||
|
||||
structuredContent = await generationService.generateDocumentWithTwoPhases(userPrompt=prompt)
|
||||
|
||||
documents = await generationService.renderReport(
|
||||
extractedContent=structuredContent,
|
||||
outputFormat=outputFormat,
|
||||
language="de",
|
||||
language=language,
|
||||
title=title,
|
||||
userPrompt=prompt,
|
||||
aiService=aiService,
|
||||
userPrompt=content,
|
||||
)
|
||||
|
||||
if not documents:
|
||||
return ToolResult(toolCallId="", toolName="generateDocument", success=False, error="Rendering produced no documents")
|
||||
return ToolResult(toolCallId="", toolName="renderDocument", success=False, error="Rendering produced no output")
|
||||
|
||||
savedFiles = []
|
||||
sideEvents = []
|
||||
chatService = services.chat
|
||||
for doc in documents:
|
||||
docData = doc.data if hasattr(doc, "data") else doc.get("data", b"")
|
||||
docName = doc.fileName if hasattr(doc, "fileName") else doc.get("fileName", f"{title}.{outputFormat}")
|
||||
docMime = doc.mimeType if hasattr(doc, "mimeType") else doc.get("mimeType", "application/octet-stream")
|
||||
|
||||
fileItem = chatService.interfaceDbComponent.saveGeneratedFile(
|
||||
docData, docName, docMime,
|
||||
) if hasattr(chatService.interfaceDbComponent, "saveGeneratedFile") else None
|
||||
sanitizedTitle = _re.sub(r'[^a-zA-Z0-9._-]', '_', title).strip('_') or "document"
|
||||
|
||||
for doc in documents:
|
||||
docData = doc.documentData if hasattr(doc, "documentData") else b""
|
||||
docName = doc.filename if hasattr(doc, "filename") else f"{sanitizedTitle}.{outputFormat}"
|
||||
docMime = doc.mimeType if hasattr(doc, "mimeType") else "application/octet-stream"
|
||||
|
||||
if not docName.lower().endswith(f".{outputFormat}"):
|
||||
docName = f"{sanitizedTitle}.{outputFormat}"
|
||||
|
||||
fileItem = None
|
||||
if hasattr(chatService.interfaceDbComponent, "saveGeneratedFile"):
|
||||
fileItem = chatService.interfaceDbComponent.saveGeneratedFile(docData, docName, docMime)
|
||||
else:
|
||||
fileItem, _ = chatService.interfaceDbComponent.saveUploadedFile(docData, docName)
|
||||
|
||||
if fileItem:
|
||||
savedFiles.append(f"- {docName} (id: {fileItem.id if hasattr(fileItem, 'id') else fileItem.get('id', '?')})")
|
||||
else:
|
||||
savedFiles.append(f"- {docName} (generated, not saved)")
|
||||
fid = fileItem.id if hasattr(fileItem, "id") else fileItem.get("id", "?")
|
||||
savedFiles.append(f"- {docName} (id: {fid})")
|
||||
sideEvents.append({
|
||||
"type": "fileCreated",
|
||||
"data": {
|
||||
"fileId": fid,
|
||||
"fileName": docName,
|
||||
"mimeType": docMime,
|
||||
"fileSize": len(docData),
|
||||
},
|
||||
})
|
||||
|
||||
result = f"Generated {len(documents)} document(s):\n" + "\n".join(savedFiles)
|
||||
return ToolResult(toolCallId="", toolName="generateDocument", success=True, data=result)
|
||||
result = f"Rendered {len(documents)} document(s):\n" + "\n".join(savedFiles)
|
||||
return ToolResult(toolCallId="", toolName="renderDocument", success=True, data=result, sideEvents=sideEvents)
|
||||
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="generateDocument", success=False, error=str(e))
|
||||
logger.error(f"renderDocument failed: {e}")
|
||||
return ToolResult(toolCallId="", toolName="renderDocument", success=False, error=str(e))
|
||||
|
||||
registry.register(
|
||||
"generateDocument", _generateDocument,
|
||||
description="Generate a document in any format (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT).",
|
||||
"renderDocument", _renderDocument,
|
||||
description=(
|
||||
"Render markdown content into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
|
||||
"You write the full document content as markdown, then this tool converts and renders it. "
|
||||
"To embed images from uploaded files, use markdown image syntax with the file ID: . "
|
||||
"The images will be resolved from the Knowledge Store and embedded in the output document."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt": {"type": "string", "description": "What the document should contain and how it should look"},
|
||||
"content": {"type": "string", "description": "Full document content as markdown (headings, tables, lists, code blocks, paragraphs, images via )"},
|
||||
"outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
|
||||
"title": {"type": "string", "description": "Document title", "default": "Generated Document"},
|
||||
"title": {"type": "string", "description": "Document title", "default": "Document"},
|
||||
"language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
|
||||
},
|
||||
"required": ["prompt"],
|
||||
"required": ["content"],
|
||||
},
|
||||
readOnly=False,
|
||||
)
|
||||
|
||||
# ── textToSpeech tool ──────────────────────────────────────────────
|
||||
def _stripMarkdownForTts(text: str) -> str:
|
||||
"""Strip markdown formatting so TTS reads clean speech text."""
|
||||
import re as _re
|
||||
t = text
|
||||
t = _re.sub(r'\*\*(.+?)\*\*', r'\1', t)
|
||||
t = _re.sub(r'\*(.+?)\*', r'\1', t)
|
||||
t = _re.sub(r'__(.+?)__', r'\1', t)
|
||||
t = _re.sub(r'_(.+?)_', r'\1', t)
|
||||
t = _re.sub(r'`[^`]+`', lambda m: m.group(0)[1:-1], t)
|
||||
t = _re.sub(r'^#{1,6}\s*', '', t, flags=_re.MULTILINE)
|
||||
t = _re.sub(r'^\s*[-*+]\s+', '', t, flags=_re.MULTILINE)
|
||||
t = _re.sub(r'^\s*\d+\.\s+', '', t, flags=_re.MULTILINE)
|
||||
t = _re.sub(r'\[(.+?)\]\(.+?\)', r'\1', t)
|
||||
t = _re.sub(r'!\[.*?\]\(.*?\)', '', t)
|
||||
t = _re.sub(r'\n{3,}', '\n\n', t)
|
||||
return t.strip()
|
||||
|
||||
async def _textToSpeech(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
"""Convert text to speech using Google Cloud TTS, deliver audio via SSE."""
|
||||
import base64 as _b64
|
||||
text = args.get("text", "")
|
||||
language = args.get("language", "auto")
|
||||
voiceName = args.get("voiceName")
|
||||
|
||||
if not text:
|
||||
return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="text is required")
|
||||
|
||||
cleanText = _stripMarkdownForTts(text)
|
||||
if not cleanText:
|
||||
return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="text is empty after stripping markdown")
|
||||
|
||||
try:
|
||||
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
||||
mandateId = context.get("mandateId", "")
|
||||
voiceInterface = getVoiceInterface(currentUser=None, mandateId=mandateId)
|
||||
|
||||
_ISO_TO_BCP47 = {
|
||||
"de": "de-DE", "en": "en-US", "fr": "fr-FR", "it": "it-IT",
|
||||
"es": "es-ES", "pt": "pt-BR", "nl": "nl-NL", "pl": "pl-PL",
|
||||
"ru": "ru-RU", "ja": "ja-JP", "zh": "zh-CN", "ko": "ko-KR",
|
||||
"ar": "ar-XA", "hi": "hi-IN", "tr": "tr-TR", "sv": "sv-SE",
|
||||
}
|
||||
|
||||
if language == "auto":
|
||||
try:
|
||||
snippet = cleanText[:500]
|
||||
detectResult = await voiceInterface.detectLanguage(snippet)
|
||||
if detectResult and detectResult.get("success"):
|
||||
detected = detectResult.get("language", "de")
|
||||
language = _ISO_TO_BCP47.get(detected, detected)
|
||||
if "-" not in language:
|
||||
language = _ISO_TO_BCP47.get(language, f"{language}-{language.upper()}")
|
||||
logger.info(f"textToSpeech: auto-detected language '{detected}' -> '{language}'")
|
||||
else:
|
||||
language = "de-DE"
|
||||
except Exception as detectErr:
|
||||
logger.warning(f"textToSpeech: language detection failed: {detectErr}, defaulting to de-DE")
|
||||
language = "de-DE"
|
||||
|
||||
if not voiceName:
|
||||
try:
|
||||
featureInstanceId = context.get("featureInstanceId", "")
|
||||
userId = context.get("userId", "")
|
||||
if featureInstanceId and userId:
|
||||
dbMgmt = services.chat.interfaceDbApp if hasattr(services.chat, "interfaceDbApp") else None
|
||||
if dbMgmt and hasattr(dbMgmt, "getVoiceSettings"):
|
||||
vs = dbMgmt.getVoiceSettings(userId)
|
||||
if vs:
|
||||
voiceMap = {}
|
||||
if hasattr(vs, "ttsVoiceMap") and vs.ttsVoiceMap:
|
||||
voiceMap = vs.ttsVoiceMap if isinstance(vs.ttsVoiceMap, dict) else {}
|
||||
if language in voiceMap:
|
||||
voiceName = voiceMap[language].get("voiceName") if isinstance(voiceMap[language], dict) else voiceMap[language]
|
||||
logger.info(f"textToSpeech: using configured voice '{voiceName}' for {language}")
|
||||
elif hasattr(vs, "ttsVoice") and vs.ttsVoice and hasattr(vs, "ttsLanguage") and vs.ttsLanguage == language:
|
||||
voiceName = vs.ttsVoice
|
||||
except Exception as prefErr:
|
||||
logger.debug(f"textToSpeech: could not load voice preferences: {prefErr}")
|
||||
|
||||
ttsResult = await voiceInterface.textToSpeech(
|
||||
text=cleanText,
|
||||
languageCode=language,
|
||||
voiceName=voiceName,
|
||||
)
|
||||
|
||||
if not ttsResult or not ttsResult.get("success"):
|
||||
errMsg = ttsResult.get("error", "TTS call failed") if ttsResult else "TTS returned None"
|
||||
return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error=errMsg)
|
||||
|
||||
audioContent = ttsResult.get("audioContent", "")
|
||||
if not audioContent:
|
||||
return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="TTS returned no audio")
|
||||
|
||||
if isinstance(audioContent, bytes):
|
||||
audioB64 = _b64.b64encode(audioContent).decode("ascii")
|
||||
elif isinstance(audioContent, str):
|
||||
audioB64 = audioContent
|
||||
else:
|
||||
audioB64 = str(audioContent)
|
||||
|
||||
audioFormat = ttsResult.get("audioFormat", "mp3")
|
||||
charCount = len(cleanText)
|
||||
usedVoice = voiceName or "default"
|
||||
logger.info(f"textToSpeech: generated {audioFormat} audio for {charCount} chars, language={language}, voice={usedVoice}")
|
||||
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="textToSpeech", success=True,
|
||||
data=f"Audio generated ({charCount} characters, language={language}, voice={usedVoice}). Playing in chat.",
|
||||
sideEvents=[{
|
||||
"type": "voiceResponse",
|
||||
"data": {
|
||||
"audio": audioB64,
|
||||
"format": audioFormat,
|
||||
"language": language,
|
||||
"charCount": charCount,
|
||||
},
|
||||
}],
|
||||
)
|
||||
|
||||
except ImportError:
|
||||
return ToolResult(toolCallId="", toolName="textToSpeech", success=False,
|
||||
error="Voice interface not available (missing dependency)")
|
||||
except Exception as e:
|
||||
logger.error(f"textToSpeech failed: {e}")
|
||||
return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error=str(e))
|
||||
|
||||
registry.register(
|
||||
"textToSpeech", _textToSpeech,
|
||||
description=(
|
||||
"Convert text to speech audio. The audio is played directly in the chat. "
|
||||
"Use this when the user asks you to read something aloud, narrate, or speak. "
|
||||
"Language is auto-detected from the text content. You do NOT need to specify a language."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {"type": "string", "description": "The text to convert to speech. Can include markdown (will be stripped automatically)."},
|
||||
"language": {"type": "string", "description": "BCP-47 language code (e.g. de-DE, en-US) or 'auto' for automatic detection", "default": "auto"},
|
||||
"voiceName": {"type": "string", "description": "Optional specific voice name. If omitted, uses the configured voice for the detected language."},
|
||||
},
|
||||
"required": ["text"],
|
||||
},
|
||||
readOnly=False,
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in a new issue