enhanced stt/tts functions
This commit is contained in:
parent
7fe6f9bc97
commit
3d49bd9d03
7 changed files with 510 additions and 39 deletions
|
|
@ -229,15 +229,15 @@ class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
return [
|
return [
|
||||||
ExternalEntry(
|
ExternalEntry(
|
||||||
name=f.get("displayName", ""),
|
name=f.get("displayName", ""),
|
||||||
path=f"/{f.get('displayName', '')}",
|
path=f"/{f.get('id', '')}",
|
||||||
isFolder=True,
|
isFolder=True,
|
||||||
metadata={"id": f.get("id"), "totalItemCount": f.get("totalItemCount")},
|
metadata={"id": f.get("id"), "totalItemCount": f.get("totalItemCount")},
|
||||||
)
|
)
|
||||||
for f in result.get("value", [])
|
for f in result.get("value", [])
|
||||||
]
|
]
|
||||||
|
|
||||||
folderName = path.strip("/")
|
folderId = path.strip("/")
|
||||||
endpoint = f"me/mailFolders/{folderName}/messages?$top=25&$orderby=receivedDateTime desc"
|
endpoint = f"me/mailFolders/{folderId}/messages?$top=25&$orderby=receivedDateTime desc"
|
||||||
result = await self._graphGet(endpoint)
|
result = await self._graphGet(endpoint)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
return []
|
return []
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
"""Voice settings datamodel."""
|
"""Voice settings datamodel."""
|
||||||
|
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from modules.shared.attributeUtils import registerModelLabels
|
from modules.shared.attributeUtils import registerModelLabels
|
||||||
from modules.shared.timeUtils import getUtcTimestamp
|
from modules.shared.timeUtils import getUtcTimestamp
|
||||||
|
|
@ -16,6 +17,7 @@ class VoiceSettings(BaseModel):
|
||||||
sttLanguage: str = Field(default="de-DE", description="Speech-to-Text language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
|
sttLanguage: str = Field(default="de-DE", description="Speech-to-Text language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
|
||||||
ttsLanguage: str = Field(default="de-DE", description="Text-to-Speech language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
|
ttsLanguage: str = Field(default="de-DE", description="Text-to-Speech language", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
|
||||||
ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
|
ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": True})
|
||||||
|
ttsVoiceMap: Dict[str, Any] = Field(default_factory=dict, description="Per-language voice mapping, e.g. {'de-DE': {'voiceName': 'de-DE-Wavenet-A'}, 'en-US': {'voiceName': 'en-US-Wavenet-C'}}", json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False})
|
||||||
translationEnabled: bool = Field(default=True, description="Whether translation is enabled", json_schema_extra={"frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False})
|
translationEnabled: bool = Field(default=True, description="Whether translation is enabled", json_schema_extra={"frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False})
|
||||||
targetLanguage: str = Field(default="en-US", description="Target language for translation", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False})
|
targetLanguage: str = Field(default="en-US", description="Target language for translation", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False})
|
||||||
creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False})
|
creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False})
|
||||||
|
|
@ -33,6 +35,7 @@ registerModelLabels(
|
||||||
"sttLanguage": {"en": "STT Language", "fr": "Langue STT"},
|
"sttLanguage": {"en": "STT Language", "fr": "Langue STT"},
|
||||||
"ttsLanguage": {"en": "TTS Language", "fr": "Langue TTS"},
|
"ttsLanguage": {"en": "TTS Language", "fr": "Langue TTS"},
|
||||||
"ttsVoice": {"en": "TTS Voice", "fr": "Voix TTS"},
|
"ttsVoice": {"en": "TTS Voice", "fr": "Voix TTS"},
|
||||||
|
"ttsVoiceMap": {"en": "TTS Voice Map", "fr": "Carte des voix TTS"},
|
||||||
"translationEnabled": {"en": "Translation Enabled", "fr": "Traduction activée"},
|
"translationEnabled": {"en": "Translation Enabled", "fr": "Traduction activée"},
|
||||||
"targetLanguage": {"en": "Target Language", "fr": "Langue cible"},
|
"targetLanguage": {"en": "Target Language", "fr": "Langue cible"},
|
||||||
"creationDate": {"en": "Creation Date", "fr": "Date de création"},
|
"creationDate": {"en": "Creation Date", "fr": "Date de création"},
|
||||||
|
|
|
||||||
|
|
@ -718,3 +718,120 @@ async def synthesizeVoice(
|
||||||
if not text:
|
if not text:
|
||||||
raise HTTPException(status_code=400, detail="text is required")
|
raise HTTPException(status_code=400, detail="text is required")
|
||||||
return JSONResponse({"audio": None, "note": "TTS via browser Speech Synthesis API recommended"})
|
return JSONResponse({"audio": None, "note": "TTS via browser Speech Synthesis API recommended"})
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Voice Settings Endpoints
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
@router.get("/{instanceId}/settings/voice")
|
||||||
|
@limiter.limit("30/minute")
|
||||||
|
async def getVoiceSettings(
|
||||||
|
request: Request,
|
||||||
|
instanceId: str = Path(...),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
):
|
||||||
|
"""Load voice settings for the current user and instance."""
|
||||||
|
_validateInstanceAccess(instanceId, context)
|
||||||
|
dbMgmt = _getDbManagement(context, instanceId)
|
||||||
|
userId = str(context.user.id)
|
||||||
|
vs = dbMgmt.getVoiceSettings(userId)
|
||||||
|
if not vs:
|
||||||
|
vs = dbMgmt.getOrCreateVoiceSettings(userId)
|
||||||
|
result = vs.model_dump() if vs else {}
|
||||||
|
return JSONResponse(result)
|
||||||
|
|
||||||
|
|
||||||
|
@router.put("/{instanceId}/settings/voice")
|
||||||
|
@limiter.limit("30/minute")
|
||||||
|
async def updateVoiceSettings(
|
||||||
|
request: Request,
|
||||||
|
instanceId: str = Path(...),
|
||||||
|
body: dict = Body(...),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
):
|
||||||
|
"""Update voice settings for the current user and instance."""
|
||||||
|
_validateInstanceAccess(instanceId, context)
|
||||||
|
dbMgmt = _getDbManagement(context, instanceId)
|
||||||
|
userId = str(context.user.id)
|
||||||
|
|
||||||
|
vs = dbMgmt.getVoiceSettings(userId)
|
||||||
|
if not vs:
|
||||||
|
createData = {
|
||||||
|
"userId": userId,
|
||||||
|
"mandateId": str(context.mandateId) if context.mandateId else "",
|
||||||
|
"featureInstanceId": instanceId,
|
||||||
|
}
|
||||||
|
createData.update(body)
|
||||||
|
created = dbMgmt.createVoiceSettings(createData)
|
||||||
|
return JSONResponse(created)
|
||||||
|
|
||||||
|
updateData = {k: v for k, v in body.items() if k not in ("id", "userId", "mandateId", "featureInstanceId", "creationDate")}
|
||||||
|
updated = dbMgmt.updateVoiceSettings(userId, updateData)
|
||||||
|
return JSONResponse(updated)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{instanceId}/voice/languages")
|
||||||
|
@limiter.limit("30/minute")
|
||||||
|
async def getVoiceLanguages(
|
||||||
|
request: Request,
|
||||||
|
instanceId: str = Path(...),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
):
|
||||||
|
"""Return available TTS languages."""
|
||||||
|
mandateId = _validateInstanceAccess(instanceId, context)
|
||||||
|
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
||||||
|
voiceInterface = getVoiceInterface(context.user, mandateId)
|
||||||
|
languagesResult = await voiceInterface.getAvailableLanguages()
|
||||||
|
languageList = languagesResult.get("languages", []) if isinstance(languagesResult, dict) else languagesResult
|
||||||
|
return JSONResponse({"languages": languageList})
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{instanceId}/voice/voices")
|
||||||
|
@limiter.limit("30/minute")
|
||||||
|
async def getVoiceVoices(
|
||||||
|
request: Request,
|
||||||
|
instanceId: str = Path(...),
|
||||||
|
language: str = Query("de-DE"),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
):
|
||||||
|
"""Return available TTS voices for a given language."""
|
||||||
|
mandateId = _validateInstanceAccess(instanceId, context)
|
||||||
|
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
||||||
|
voiceInterface = getVoiceInterface(context.user, mandateId)
|
||||||
|
voicesResult = await voiceInterface.getAvailableVoices(language)
|
||||||
|
voiceList = voicesResult.get("voices", []) if isinstance(voicesResult, dict) else voicesResult
|
||||||
|
return JSONResponse({"voices": voiceList})
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{instanceId}/voice/test")
|
||||||
|
@limiter.limit("10/minute")
|
||||||
|
async def testVoice(
|
||||||
|
request: Request,
|
||||||
|
instanceId: str = Path(...),
|
||||||
|
body: dict = Body(...),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
):
|
||||||
|
"""Test a specific voice with a sample text."""
|
||||||
|
import base64
|
||||||
|
mandateId = _validateInstanceAccess(instanceId, context)
|
||||||
|
text = body.get("text", "Hallo, das ist ein Stimmtest.")
|
||||||
|
language = body.get("language", "de-DE")
|
||||||
|
voiceId = body.get("voiceId")
|
||||||
|
|
||||||
|
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
||||||
|
voiceInterface = getVoiceInterface(context.user, mandateId)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await voiceInterface.textToSpeech(text=text, languageCode=language, voiceName=voiceId)
|
||||||
|
if result and isinstance(result, dict):
|
||||||
|
audioContent = result.get("audioContent")
|
||||||
|
if audioContent:
|
||||||
|
audioB64 = base64.b64encode(
|
||||||
|
audioContent if isinstance(audioContent, bytes) else audioContent.encode()
|
||||||
|
).decode()
|
||||||
|
return JSONResponse({"success": True, "audio": audioB64, "format": "mp3", "text": text})
|
||||||
|
return JSONResponse({"success": False, "error": "TTS returned no audio"})
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Voice test failed: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"TTS test failed: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -87,9 +87,10 @@ CLIENT_SECRET = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET")
|
||||||
REDIRECT_URI = APP_CONFIG.get("Service_GOOGLE_REDIRECT_URI")
|
REDIRECT_URI = APP_CONFIG.get("Service_GOOGLE_REDIRECT_URI")
|
||||||
SCOPES = [
|
SCOPES = [
|
||||||
"https://www.googleapis.com/auth/gmail.readonly",
|
"https://www.googleapis.com/auth/gmail.readonly",
|
||||||
|
"https://www.googleapis.com/auth/drive.readonly",
|
||||||
"https://www.googleapis.com/auth/userinfo.profile",
|
"https://www.googleapis.com/auth/userinfo.profile",
|
||||||
"https://www.googleapis.com/auth/userinfo.email",
|
"https://www.googleapis.com/auth/userinfo.email",
|
||||||
"openid"
|
"openid",
|
||||||
]
|
]
|
||||||
|
|
||||||
@router.get("/config")
|
@router.get("/config")
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,7 @@ SCOPES = [
|
||||||
"Mail.Send", # Send mail
|
"Mail.Send", # Send mail
|
||||||
"Files.ReadWrite.All", # Read and write files (SharePoint/OneDrive)
|
"Files.ReadWrite.All", # Read and write files (SharePoint/OneDrive)
|
||||||
"Sites.ReadWrite.All", # Read and write SharePoint sites
|
"Sites.ReadWrite.All", # Read and write SharePoint sites
|
||||||
|
"Team.ReadBasic.All", # List joined teams and channels
|
||||||
# Teams Bot: Meeting and chat access (requires admin consent)
|
# Teams Bot: Meeting and chat access (requires admin consent)
|
||||||
"OnlineMeetings.Read", # Read user's Teams meeting details (delegated scope)
|
"OnlineMeetings.Read", # Read user's Teams meeting details (delegated scope)
|
||||||
"Chat.ReadWrite", # Read and write Teams chat messages
|
"Chat.ReadWrite", # Read and write Teams chat messages
|
||||||
|
|
|
||||||
|
|
@ -208,7 +208,8 @@ async def runAgentLoop(
|
||||||
results = await _executeToolCalls(toolCalls, toolRegistry, {
|
results = await _executeToolCalls(toolCalls, toolRegistry, {
|
||||||
"workflowId": workflowId,
|
"workflowId": workflowId,
|
||||||
"userId": userId,
|
"userId": userId,
|
||||||
"featureInstanceId": featureInstanceId
|
"featureInstanceId": featureInstanceId,
|
||||||
|
"mandateId": mandateId,
|
||||||
})
|
})
|
||||||
state.totalToolCalls += len(results)
|
state.totalToolCalls += len(results)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -209,7 +209,8 @@ class AgentService:
|
||||||
"## Attached Files\n"
|
"## Attached Files\n"
|
||||||
"These files have been uploaded and processed through the extraction pipeline.\n"
|
"These files have been uploaded and processed through the extraction pipeline.\n"
|
||||||
"Use `readFile(fileId)` to read text content, `readContentObjects(fileId)` for structured access, "
|
"Use `readFile(fileId)` to read text content, `readContentObjects(fileId)` for structured access, "
|
||||||
"or `describeImage(fileId)` for image analysis.\n\n"
|
"or `describeImage(fileId)` for image analysis.\n"
|
||||||
|
"When generating documents with `renderDocument`, embed images using `` in the markdown content.\n\n"
|
||||||
)
|
)
|
||||||
header += "\n\n".join(fileDescriptions)
|
header += "\n\n".join(fileDescriptions)
|
||||||
return f"{header}\n\n---\n\nUser request: {prompt}"
|
return f"{header}\n\n---\n\nUser request: {prompt}"
|
||||||
|
|
@ -1226,68 +1227,415 @@ def _registerCoreTools(registry: ToolRegistry, services):
|
||||||
readOnly=True,
|
readOnly=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# ---- Document generation tool ----
|
# ---- Document rendering tool ----
|
||||||
|
|
||||||
async def _generateDocument(args: Dict[str, Any], context: Dict[str, Any]):
|
def _markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
|
||||||
"""Generate a document in any format using the existing GenerationService + RendererRegistry."""
|
"""Convert markdown content to the standard document JSON format expected by renderers."""
|
||||||
prompt = args.get("prompt", "")
|
import re as _re
|
||||||
|
|
||||||
|
sections = []
|
||||||
|
order = 0
|
||||||
|
lines = markdown.split("\n")
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
def _nextId():
|
||||||
|
nonlocal order
|
||||||
|
order += 1
|
||||||
|
return f"s_{order}"
|
||||||
|
|
||||||
|
while i < len(lines):
|
||||||
|
line = lines[i]
|
||||||
|
|
||||||
|
# --- Headings ---
|
||||||
|
headingMatch = _re.match(r'^(#{1,6})\s+(.+)', line)
|
||||||
|
if headingMatch:
|
||||||
|
level = len(headingMatch.group(1))
|
||||||
|
text = headingMatch.group(2).strip()
|
||||||
|
sections.append({
|
||||||
|
"id": _nextId(), "content_type": "heading", "order": order,
|
||||||
|
"elements": [{"content": {"text": text, "level": level}}],
|
||||||
|
})
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- Fenced code blocks ---
|
||||||
|
codeMatch = _re.match(r'^```(\w*)', line)
|
||||||
|
if codeMatch:
|
||||||
|
lang = codeMatch.group(1) or "text"
|
||||||
|
codeLines = []
|
||||||
|
i += 1
|
||||||
|
while i < len(lines) and not lines[i].startswith("```"):
|
||||||
|
codeLines.append(lines[i])
|
||||||
|
i += 1
|
||||||
|
i += 1
|
||||||
|
sections.append({
|
||||||
|
"id": _nextId(), "content_type": "code_block", "order": order,
|
||||||
|
"elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}],
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- Tables ---
|
||||||
|
tableMatch = _re.match(r'^\|(.+)\|$', line)
|
||||||
|
if tableMatch and (i + 1) < len(lines) and _re.match(r'^\|[\s\-:|]+\|$', lines[i + 1]):
|
||||||
|
headerCells = [c.strip() for c in tableMatch.group(1).split("|")]
|
||||||
|
i += 2
|
||||||
|
rows = []
|
||||||
|
while i < len(lines) and _re.match(r'^\|(.+)\|$', lines[i]):
|
||||||
|
rowCells = [c.strip() for c in lines[i][1:-1].split("|")]
|
||||||
|
rows.append(rowCells)
|
||||||
|
i += 1
|
||||||
|
sections.append({
|
||||||
|
"id": _nextId(), "content_type": "table", "order": order,
|
||||||
|
"elements": [{"content": {"headers": headerCells, "rows": rows}}],
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- Bullet / numbered lists ---
|
||||||
|
listMatch = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', line)
|
||||||
|
if listMatch:
|
||||||
|
isNumbered = bool(_re.match(r'\d+[.)]', listMatch.group(2)))
|
||||||
|
items = []
|
||||||
|
while i < len(lines) and _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]):
|
||||||
|
m = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i])
|
||||||
|
items.append({"text": m.group(3).strip()})
|
||||||
|
i += 1
|
||||||
|
sections.append({
|
||||||
|
"id": _nextId(), "content_type": "bullet_list", "order": order,
|
||||||
|
"elements": [{"content": {"items": items, "list_type": "numbered" if isNumbered else "bullet"}}],
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- Empty lines (skip) ---
|
||||||
|
if not line.strip():
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- Images:  or  ---
|
||||||
|
imgMatch = _re.match(r'^!\[([^\]]*)\]\(([^)]+)\)', line)
|
||||||
|
if imgMatch:
|
||||||
|
altText = imgMatch.group(1).strip() or "Image"
|
||||||
|
src = imgMatch.group(2).strip()
|
||||||
|
fileId = ""
|
||||||
|
if src.startswith("file:"):
|
||||||
|
fileId = src[5:]
|
||||||
|
sections.append({
|
||||||
|
"id": _nextId(), "content_type": "image", "order": order,
|
||||||
|
"elements": [{
|
||||||
|
"content": {
|
||||||
|
"altText": altText,
|
||||||
|
"base64Data": "",
|
||||||
|
"_fileRef": fileId,
|
||||||
|
"_srcUrl": src if not fileId else "",
|
||||||
|
}
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- Paragraph (collect consecutive non-empty lines) ---
|
||||||
|
paraLines = []
|
||||||
|
while i < len(lines) and lines[i].strip() and not _re.match(r'^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )', lines[i]):
|
||||||
|
paraLines.append(lines[i])
|
||||||
|
i += 1
|
||||||
|
if paraLines:
|
||||||
|
sections.append({
|
||||||
|
"id": _nextId(), "content_type": "paragraph", "order": order,
|
||||||
|
"elements": [{"content": {"text": " ".join(paraLines)}}],
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if not sections:
|
||||||
|
sections.append({
|
||||||
|
"id": _nextId(), "content_type": "paragraph", "order": order,
|
||||||
|
"elements": [{"content": {"text": markdown.strip() or "(empty)"}}],
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"metadata": {
|
||||||
|
"split_strategy": "single_document",
|
||||||
|
"source_documents": [],
|
||||||
|
"extraction_method": "agent_rendering",
|
||||||
|
"title": title,
|
||||||
|
"language": language,
|
||||||
|
},
|
||||||
|
"documents": [{
|
||||||
|
"id": "doc_1",
|
||||||
|
"title": title,
|
||||||
|
"sections": sections,
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]):
|
||||||
|
"""Render agent-produced markdown content into any document format via the RendererRegistry."""
|
||||||
|
import re as _re
|
||||||
|
content = args.get("content", "")
|
||||||
outputFormat = args.get("outputFormat", "pdf")
|
outputFormat = args.get("outputFormat", "pdf")
|
||||||
title = args.get("title", "Generated Document")
|
title = args.get("title", "Document")
|
||||||
|
language = args.get("language", "de")
|
||||||
|
|
||||||
if not prompt:
|
if not content:
|
||||||
return ToolResult(toolCallId="", toolName="generateDocument", success=False, error="prompt is required")
|
return ToolResult(toolCallId="", toolName="renderDocument", success=False, error="content is required")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
structuredContent = _markdownToDocumentJson(content, title, language)
|
||||||
|
|
||||||
|
# Resolve image file references (file:fileId) to base64 data from Knowledge Store
|
||||||
|
knowledgeService = None
|
||||||
|
try:
|
||||||
|
knowledgeService = services.getService("knowledge")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
resolvedImages = 0
|
||||||
|
for doc in structuredContent.get("documents", []):
|
||||||
|
for section in doc.get("sections", []):
|
||||||
|
if section.get("content_type") != "image":
|
||||||
|
continue
|
||||||
|
for element in section.get("elements", []):
|
||||||
|
contentObj = element.get("content", {})
|
||||||
|
fileRef = contentObj.get("_fileRef", "")
|
||||||
|
if not fileRef or contentObj.get("base64Data"):
|
||||||
|
continue
|
||||||
|
if knowledgeService:
|
||||||
|
chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef)
|
||||||
|
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
|
||||||
|
if imageChunks:
|
||||||
|
contentObj["base64Data"] = imageChunks[0].get("data", "")
|
||||||
|
chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png")
|
||||||
|
contentObj["mimeType"] = chunkMime
|
||||||
|
resolvedImages += 1
|
||||||
|
if not contentObj.get("base64Data"):
|
||||||
|
try:
|
||||||
|
rawBytes = services.chat.getFileData(fileRef)
|
||||||
|
if rawBytes:
|
||||||
|
import base64 as _b64
|
||||||
|
contentObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii")
|
||||||
|
contentObj["mimeType"] = "image/png"
|
||||||
|
resolvedImages += 1
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
contentObj.pop("_fileRef", None)
|
||||||
|
contentObj.pop("_srcUrl", None)
|
||||||
|
|
||||||
|
sectionCount = len(structuredContent.get("documents", [{}])[0].get("sections", []))
|
||||||
|
logger.info(f"renderDocument: parsed {sectionCount} sections from markdown ({len(content)} chars), resolved {resolvedImages} image(s), format={outputFormat}")
|
||||||
|
|
||||||
generationService = services.getService("generation")
|
generationService = services.getService("generation")
|
||||||
aiService = services.ai
|
|
||||||
|
|
||||||
structuredContent = await generationService.generateDocumentWithTwoPhases(userPrompt=prompt)
|
|
||||||
|
|
||||||
documents = await generationService.renderReport(
|
documents = await generationService.renderReport(
|
||||||
extractedContent=structuredContent,
|
extractedContent=structuredContent,
|
||||||
outputFormat=outputFormat,
|
outputFormat=outputFormat,
|
||||||
language="de",
|
language=language,
|
||||||
title=title,
|
title=title,
|
||||||
userPrompt=prompt,
|
userPrompt=content,
|
||||||
aiService=aiService,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if not documents:
|
if not documents:
|
||||||
return ToolResult(toolCallId="", toolName="generateDocument", success=False, error="Rendering produced no documents")
|
return ToolResult(toolCallId="", toolName="renderDocument", success=False, error="Rendering produced no output")
|
||||||
|
|
||||||
savedFiles = []
|
savedFiles = []
|
||||||
|
sideEvents = []
|
||||||
chatService = services.chat
|
chatService = services.chat
|
||||||
for doc in documents:
|
|
||||||
docData = doc.data if hasattr(doc, "data") else doc.get("data", b"")
|
|
||||||
docName = doc.fileName if hasattr(doc, "fileName") else doc.get("fileName", f"{title}.{outputFormat}")
|
|
||||||
docMime = doc.mimeType if hasattr(doc, "mimeType") else doc.get("mimeType", "application/octet-stream")
|
|
||||||
|
|
||||||
fileItem = chatService.interfaceDbComponent.saveGeneratedFile(
|
sanitizedTitle = _re.sub(r'[^a-zA-Z0-9._-]', '_', title).strip('_') or "document"
|
||||||
docData, docName, docMime,
|
|
||||||
) if hasattr(chatService.interfaceDbComponent, "saveGeneratedFile") else None
|
for doc in documents:
|
||||||
|
docData = doc.documentData if hasattr(doc, "documentData") else b""
|
||||||
|
docName = doc.filename if hasattr(doc, "filename") else f"{sanitizedTitle}.{outputFormat}"
|
||||||
|
docMime = doc.mimeType if hasattr(doc, "mimeType") else "application/octet-stream"
|
||||||
|
|
||||||
|
if not docName.lower().endswith(f".{outputFormat}"):
|
||||||
|
docName = f"{sanitizedTitle}.{outputFormat}"
|
||||||
|
|
||||||
|
fileItem = None
|
||||||
|
if hasattr(chatService.interfaceDbComponent, "saveGeneratedFile"):
|
||||||
|
fileItem = chatService.interfaceDbComponent.saveGeneratedFile(docData, docName, docMime)
|
||||||
|
else:
|
||||||
|
fileItem, _ = chatService.interfaceDbComponent.saveUploadedFile(docData, docName)
|
||||||
|
|
||||||
if fileItem:
|
if fileItem:
|
||||||
savedFiles.append(f"- {docName} (id: {fileItem.id if hasattr(fileItem, 'id') else fileItem.get('id', '?')})")
|
fid = fileItem.id if hasattr(fileItem, "id") else fileItem.get("id", "?")
|
||||||
else:
|
savedFiles.append(f"- {docName} (id: {fid})")
|
||||||
savedFiles.append(f"- {docName} (generated, not saved)")
|
sideEvents.append({
|
||||||
|
"type": "fileCreated",
|
||||||
|
"data": {
|
||||||
|
"fileId": fid,
|
||||||
|
"fileName": docName,
|
||||||
|
"mimeType": docMime,
|
||||||
|
"fileSize": len(docData),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
result = f"Generated {len(documents)} document(s):\n" + "\n".join(savedFiles)
|
result = f"Rendered {len(documents)} document(s):\n" + "\n".join(savedFiles)
|
||||||
return ToolResult(toolCallId="", toolName="generateDocument", success=True, data=result)
|
return ToolResult(toolCallId="", toolName="renderDocument", success=True, data=result, sideEvents=sideEvents)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return ToolResult(toolCallId="", toolName="generateDocument", success=False, error=str(e))
|
logger.error(f"renderDocument failed: {e}")
|
||||||
|
return ToolResult(toolCallId="", toolName="renderDocument", success=False, error=str(e))
|
||||||
|
|
||||||
registry.register(
|
registry.register(
|
||||||
"generateDocument", _generateDocument,
|
"renderDocument", _renderDocument,
|
||||||
description="Generate a document in any format (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT).",
|
description=(
|
||||||
|
"Render markdown content into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
|
||||||
|
"You write the full document content as markdown, then this tool converts and renders it. "
|
||||||
|
"To embed images from uploaded files, use markdown image syntax with the file ID: . "
|
||||||
|
"The images will be resolved from the Knowledge Store and embedded in the output document."
|
||||||
|
),
|
||||||
parameters={
|
parameters={
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"prompt": {"type": "string", "description": "What the document should contain and how it should look"},
|
"content": {"type": "string", "description": "Full document content as markdown (headings, tables, lists, code blocks, paragraphs, images via )"},
|
||||||
"outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
|
"outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
|
||||||
"title": {"type": "string", "description": "Document title", "default": "Generated Document"},
|
"title": {"type": "string", "description": "Document title", "default": "Document"},
|
||||||
|
"language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
|
||||||
},
|
},
|
||||||
"required": ["prompt"],
|
"required": ["content"],
|
||||||
|
},
|
||||||
|
readOnly=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── textToSpeech tool ──────────────────────────────────────────────
|
||||||
|
def _stripMarkdownForTts(text: str) -> str:
|
||||||
|
"""Strip markdown formatting so TTS reads clean speech text."""
|
||||||
|
import re as _re
|
||||||
|
t = text
|
||||||
|
t = _re.sub(r'\*\*(.+?)\*\*', r'\1', t)
|
||||||
|
t = _re.sub(r'\*(.+?)\*', r'\1', t)
|
||||||
|
t = _re.sub(r'__(.+?)__', r'\1', t)
|
||||||
|
t = _re.sub(r'_(.+?)_', r'\1', t)
|
||||||
|
t = _re.sub(r'`[^`]+`', lambda m: m.group(0)[1:-1], t)
|
||||||
|
t = _re.sub(r'^#{1,6}\s*', '', t, flags=_re.MULTILINE)
|
||||||
|
t = _re.sub(r'^\s*[-*+]\s+', '', t, flags=_re.MULTILINE)
|
||||||
|
t = _re.sub(r'^\s*\d+\.\s+', '', t, flags=_re.MULTILINE)
|
||||||
|
t = _re.sub(r'\[(.+?)\]\(.+?\)', r'\1', t)
|
||||||
|
t = _re.sub(r'!\[.*?\]\(.*?\)', '', t)
|
||||||
|
t = _re.sub(r'\n{3,}', '\n\n', t)
|
||||||
|
return t.strip()
|
||||||
|
|
||||||
|
async def _textToSpeech(args: Dict[str, Any], context: Dict[str, Any]):
|
||||||
|
"""Convert text to speech using Google Cloud TTS, deliver audio via SSE."""
|
||||||
|
import base64 as _b64
|
||||||
|
text = args.get("text", "")
|
||||||
|
language = args.get("language", "auto")
|
||||||
|
voiceName = args.get("voiceName")
|
||||||
|
|
||||||
|
if not text:
|
||||||
|
return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="text is required")
|
||||||
|
|
||||||
|
cleanText = _stripMarkdownForTts(text)
|
||||||
|
if not cleanText:
|
||||||
|
return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="text is empty after stripping markdown")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
|
||||||
|
mandateId = context.get("mandateId", "")
|
||||||
|
voiceInterface = getVoiceInterface(currentUser=None, mandateId=mandateId)
|
||||||
|
|
||||||
|
_ISO_TO_BCP47 = {
|
||||||
|
"de": "de-DE", "en": "en-US", "fr": "fr-FR", "it": "it-IT",
|
||||||
|
"es": "es-ES", "pt": "pt-BR", "nl": "nl-NL", "pl": "pl-PL",
|
||||||
|
"ru": "ru-RU", "ja": "ja-JP", "zh": "zh-CN", "ko": "ko-KR",
|
||||||
|
"ar": "ar-XA", "hi": "hi-IN", "tr": "tr-TR", "sv": "sv-SE",
|
||||||
|
}
|
||||||
|
|
||||||
|
if language == "auto":
|
||||||
|
try:
|
||||||
|
snippet = cleanText[:500]
|
||||||
|
detectResult = await voiceInterface.detectLanguage(snippet)
|
||||||
|
if detectResult and detectResult.get("success"):
|
||||||
|
detected = detectResult.get("language", "de")
|
||||||
|
language = _ISO_TO_BCP47.get(detected, detected)
|
||||||
|
if "-" not in language:
|
||||||
|
language = _ISO_TO_BCP47.get(language, f"{language}-{language.upper()}")
|
||||||
|
logger.info(f"textToSpeech: auto-detected language '{detected}' -> '{language}'")
|
||||||
|
else:
|
||||||
|
language = "de-DE"
|
||||||
|
except Exception as detectErr:
|
||||||
|
logger.warning(f"textToSpeech: language detection failed: {detectErr}, defaulting to de-DE")
|
||||||
|
language = "de-DE"
|
||||||
|
|
||||||
|
if not voiceName:
|
||||||
|
try:
|
||||||
|
featureInstanceId = context.get("featureInstanceId", "")
|
||||||
|
userId = context.get("userId", "")
|
||||||
|
if featureInstanceId and userId:
|
||||||
|
dbMgmt = services.chat.interfaceDbApp if hasattr(services.chat, "interfaceDbApp") else None
|
||||||
|
if dbMgmt and hasattr(dbMgmt, "getVoiceSettings"):
|
||||||
|
vs = dbMgmt.getVoiceSettings(userId)
|
||||||
|
if vs:
|
||||||
|
voiceMap = {}
|
||||||
|
if hasattr(vs, "ttsVoiceMap") and vs.ttsVoiceMap:
|
||||||
|
voiceMap = vs.ttsVoiceMap if isinstance(vs.ttsVoiceMap, dict) else {}
|
||||||
|
if language in voiceMap:
|
||||||
|
voiceName = voiceMap[language].get("voiceName") if isinstance(voiceMap[language], dict) else voiceMap[language]
|
||||||
|
logger.info(f"textToSpeech: using configured voice '{voiceName}' for {language}")
|
||||||
|
elif hasattr(vs, "ttsVoice") and vs.ttsVoice and hasattr(vs, "ttsLanguage") and vs.ttsLanguage == language:
|
||||||
|
voiceName = vs.ttsVoice
|
||||||
|
except Exception as prefErr:
|
||||||
|
logger.debug(f"textToSpeech: could not load voice preferences: {prefErr}")
|
||||||
|
|
||||||
|
ttsResult = await voiceInterface.textToSpeech(
|
||||||
|
text=cleanText,
|
||||||
|
languageCode=language,
|
||||||
|
voiceName=voiceName,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not ttsResult or not ttsResult.get("success"):
|
||||||
|
errMsg = ttsResult.get("error", "TTS call failed") if ttsResult else "TTS returned None"
|
||||||
|
return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error=errMsg)
|
||||||
|
|
||||||
|
audioContent = ttsResult.get("audioContent", "")
|
||||||
|
if not audioContent:
|
||||||
|
return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error="TTS returned no audio")
|
||||||
|
|
||||||
|
if isinstance(audioContent, bytes):
|
||||||
|
audioB64 = _b64.b64encode(audioContent).decode("ascii")
|
||||||
|
elif isinstance(audioContent, str):
|
||||||
|
audioB64 = audioContent
|
||||||
|
else:
|
||||||
|
audioB64 = str(audioContent)
|
||||||
|
|
||||||
|
audioFormat = ttsResult.get("audioFormat", "mp3")
|
||||||
|
charCount = len(cleanText)
|
||||||
|
usedVoice = voiceName or "default"
|
||||||
|
logger.info(f"textToSpeech: generated {audioFormat} audio for {charCount} chars, language={language}, voice={usedVoice}")
|
||||||
|
|
||||||
|
return ToolResult(
|
||||||
|
toolCallId="", toolName="textToSpeech", success=True,
|
||||||
|
data=f"Audio generated ({charCount} characters, language={language}, voice={usedVoice}). Playing in chat.",
|
||||||
|
sideEvents=[{
|
||||||
|
"type": "voiceResponse",
|
||||||
|
"data": {
|
||||||
|
"audio": audioB64,
|
||||||
|
"format": audioFormat,
|
||||||
|
"language": language,
|
||||||
|
"charCount": charCount,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
)
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
return ToolResult(toolCallId="", toolName="textToSpeech", success=False,
|
||||||
|
error="Voice interface not available (missing dependency)")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"textToSpeech failed: {e}")
|
||||||
|
return ToolResult(toolCallId="", toolName="textToSpeech", success=False, error=str(e))
|
||||||
|
|
||||||
|
registry.register(
|
||||||
|
"textToSpeech", _textToSpeech,
|
||||||
|
description=(
|
||||||
|
"Convert text to speech audio. The audio is played directly in the chat. "
|
||||||
|
"Use this when the user asks you to read something aloud, narrate, or speak. "
|
||||||
|
"Language is auto-detected from the text content. You do NOT need to specify a language."
|
||||||
|
),
|
||||||
|
parameters={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"text": {"type": "string", "description": "The text to convert to speech. Can include markdown (will be stripped automatically)."},
|
||||||
|
"language": {"type": "string", "description": "BCP-47 language code (e.g. de-DE, en-US) or 'auto' for automatic detection", "default": "auto"},
|
||||||
|
"voiceName": {"type": "string", "description": "Optional specific voice name. If omitted, uses the configured voice for the detected language."},
|
||||||
|
},
|
||||||
|
"required": ["text"],
|
||||||
},
|
},
|
||||||
readOnly=False,
|
readOnly=False,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue