This commit is contained in:
Ida Dittrich 2026-02-14 17:32:49 +01:00
commit f9d2f2061e
6 changed files with 152 additions and 51 deletions

View file

@ -58,7 +58,8 @@ class ConnectorGoogleSpeech:
raise raise
async def speechToText(self, audioContent: bytes, language: str = "de-DE", async def speechToText(self, audioContent: bytes, language: str = "de-DE",
sampleRate: int = None, channels: int = None) -> Dict: sampleRate: int = None, channels: int = None,
skipFallbacks: bool = False) -> Dict:
""" """
Convert speech to text using Google Cloud Speech-to-Text API. Convert speech to text using Google Cloud Speech-to-Text API.
@ -86,6 +87,9 @@ class ConnectorGoogleSpeech:
channels = validation["channels"] channels = validation["channels"]
audioFormat = validation["format"] audioFormat = validation["format"]
logger.info(f"Auto-detected audio: {audioFormat}, {sampleRate}Hz, {channels}ch") logger.info(f"Auto-detected audio: {audioFormat}, {sampleRate}Hz, {channels}ch")
else:
# When sampleRate and channels are explicitly provided, assume raw PCM (LINEAR16)
audioFormat = "linear16"
logger.info(f"Processing audio with Google Cloud Speech-to-Text") logger.info(f"Processing audio with Google Cloud Speech-to-Text")
logger.info(f"Audio: {len(audioContent)} bytes, {sampleRate}Hz, {channels}ch") logger.info(f"Audio: {len(audioContent)} bytes, {sampleRate}Hz, {channels}ch")
@ -231,6 +235,15 @@ class ConnectorGoogleSpeech:
"error": f"Google Cloud error: {response.error}" "error": f"Google Cloud error: {response.error}"
} }
# Skip fallbacks when format is known (e.g. teamsbot with explicit LINEAR16 16kHz)
if skipFallbacks:
return {
"success": False,
"text": "",
"confidence": 0.0,
"error": "No recognition results (silence or unclear audio)"
}
# Try multiple fallback approaches # Try multiple fallback approaches
fallback_configs = [] fallback_configs = []

View file

@ -301,12 +301,12 @@ def _migrateExistingTemplates() -> None:
as system templates (isSystem=True). This runs idempotently during feature registration. as system templates (isSystem=True). This runs idempotently during feature registration.
""" """
try: try:
from modules.features.automation.interfaceFeatureAutomation import getAutomationInterface from modules.features.automation.interfaceFeatureAutomation import getInterface
from modules.security.rootAccess import getRootUser from modules.security.rootAccess import getRootUser
from modules.features.automation.datamodelFeatureAutomation import AutomationTemplate from modules.features.automation.datamodelFeatureAutomation import AutomationTemplate
rootUser = getRootUser() rootUser = getRootUser()
automationInterface = getAutomationInterface(rootUser) automationInterface = getInterface(rootUser)
# Get all templates from DB # Get all templates from DB
allTemplates = automationInterface.db.getRecordset(AutomationTemplate) allTemplates = automationInterface.db.getRecordset(AutomationTemplate)

View file

@ -362,26 +362,42 @@ async def bridgeStatusCallback(
logger.info(f"Bridge status callback: session={sessionId}, status={status}") logger.info(f"Bridge status callback: session={sessionId}, status={status}")
# Update session status try:
from modules.datamodels.datamodelUam import User # Load the original user who started the session (has RBAC roles in mandate)
systemUser = User(id="system", email="system@internal") from modules.datamodels.datamodelUam import User
interface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId) from modules.interfaces.interfaceDbApp import getRootInterface
updates = {"status": status} systemUser = User(id="system", username="system", email="system@poweron.swiss")
if errorMessage: interface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId)
updates["errorMessage"] = errorMessage
if status == TeamsbotSessionStatus.ACTIVE.value: # Look up original user from session for consistent context
from modules.shared.timeUtils import getUtcTimestamp session = interface.getSession(sessionId)
updates["startedAt"] = getUtcTimestamp() startedByUserId = session.get("startedByUserId") if session else None
elif status in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]: if startedByUserId:
from modules.shared.timeUtils import getUtcTimestamp rootInterface = getRootInterface()
updates["endedAt"] = getUtcTimestamp() originalUser = rootInterface.getUser(startedByUserId)
if originalUser:
interface.updateSession(sessionId, updates) interface = interfaceDb.getInterface(originalUser, featureInstanceId=instanceId)
# Emit SSE event updates = {"status": status}
from .service import _emitSessionEvent if errorMessage:
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage}) updates["errorMessage"] = errorMessage
if status == TeamsbotSessionStatus.ACTIVE.value:
from modules.shared.timeUtils import getUtcTimestamp
updates["startedAt"] = getUtcTimestamp()
elif status in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
from modules.shared.timeUtils import getUtcTimestamp
updates["endedAt"] = getUtcTimestamp()
interface.updateSession(sessionId, updates)
# Emit SSE event
from .service import _emitSessionEvent
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
except Exception as e:
logger.error(f"Bridge status callback processing failed: session={sessionId}, error={e}")
# Still return 200 so the bridge doesn't retry endlessly
return {"received": True} return {"received": True}
@ -398,21 +414,40 @@ async def bridgeAudioWebsocket(
Gateway sends: TTS audio responses Gateway sends: TTS audio responses
""" """
await websocket.accept() await websocket.accept()
logger.info(f"Bridge audio WebSocket connected: session={sessionId}") logger.info(f"Bridge audio WebSocket connected: session={sessionId}, instance={instanceId}")
# TODO: Validate bridge API key from headers/query params # TODO: Validate bridge API key from headers/query params
config = _getInstanceConfig(instanceId)
from modules.datamodels.datamodelUam import User
systemUser = User(id="system", email="system@internal")
service = TeamsbotService(systemUser, None, instanceId, config)
try: try:
config = _getInstanceConfig(instanceId)
logger.info(f"Bridge audio WebSocket config loaded: session={sessionId}")
# Load the original user who started the session (has RBAC roles in mandate)
# Bridge callbacks have no HTTP auth, so we reconstruct the user context from the session record.
from modules.datamodels.datamodelUam import User
from modules.interfaces.interfaceDbApp import getRootInterface
systemUser = User(id="system", username="system", email="system@poweron.swiss")
sessionInterface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId)
session = sessionInterface.getSession(sessionId)
mandateId = session.get("mandateId") if session else None
startedByUserId = session.get("startedByUserId") if session else None
# Look up the original user (getRootInterface uses admin context, can load any user)
rootInterface = getRootInterface()
originalUser = rootInterface.getUser(startedByUserId) if startedByUserId else None
if not originalUser:
logger.warning(f"Could not load original user {startedByUserId}, falling back to system user")
originalUser = systemUser
service = TeamsbotService(originalUser, mandateId, instanceId, config)
logger.info(f"Bridge audio WebSocket service created: session={sessionId}, mandateId={mandateId}, user={originalUser.id}")
await service.handleAudioStream(websocket, sessionId) await service.handleAudioStream(websocket, sessionId)
except WebSocketDisconnect: except WebSocketDisconnect:
logger.info(f"Bridge audio WebSocket disconnected: session={sessionId}") logger.info(f"Bridge audio WebSocket disconnected: session={sessionId}")
except Exception as e: except Exception as e:
logger.error(f"Bridge audio WebSocket error: session={sessionId}, error={e}") logger.error(f"Bridge audio WebSocket error: session={sessionId}, error={e}", exc_info=True)
finally: finally:
logger.info(f"Bridge audio WebSocket closed: session={sessionId}") logger.info(f"Bridge audio WebSocket closed: session={sessionId}")

View file

@ -31,6 +31,22 @@ from .bridgeConnector import BridgeConnector
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# =========================================================================
# Minimal Service Context (for AI billing in bridge callbacks)
# =========================================================================
class _ServiceContext:
"""Minimal context providing user/mandate info for AiService billing.
Used by bridge callbacks where a full Services instance is not available."""
def __init__(self, user, mandateId, featureInstanceId=None):
self.user = user
self.mandateId = mandateId
self.featureInstanceId = featureInstanceId
self.featureCode = "teamsbot"
# ========================================================================= # =========================================================================
# Session Event Queues (for SSE streaming to frontend) # Session Event Queues (for SSE streaming to frontend)
# ========================================================================= # =========================================================================
@ -182,12 +198,15 @@ class TeamsbotService:
audioBuffer = bytearray() audioBuffer = bytearray()
bufferDurationMs = 0 bufferDurationMs = 0
targetBufferMs = 1500 # Buffer 1.5 seconds of audio before STT targetBufferMs = 3000 # Buffer 3 seconds of audio before STT
# PCM16 at 16kHz mono = 32000 bytes/second # PCM16 at 16kHz mono = 32000 bytes/second
bytesPerSecond = 32000 bytesPerSecond = 32000
bytesPerMs = bytesPerSecond / 1000 bytesPerMs = bytesPerSecond / 1000
# Track background STT/AI tasks so they don't block the WebSocket loop
backgroundTasks: list[asyncio.Task] = []
logger.info(f"Audio processing started for session {sessionId}") logger.info(f"Audio processing started for session {sessionId}")
try: try:
@ -219,18 +238,26 @@ class TeamsbotService:
audioBuffer.extend(audioChunk) audioBuffer.extend(audioChunk)
bufferDurationMs = len(audioBuffer) / bytesPerMs bufferDurationMs = len(audioBuffer) / bytesPerMs
# Process when buffer has enough audio # Process when buffer has enough audio - run in background to not block WebSocket
if bufferDurationMs >= targetBufferMs: if bufferDurationMs >= targetBufferMs:
await self._processAudioBuffer( chunkBytes = bytes(audioBuffer)
bytes(audioBuffer),
sessionId,
interface,
voiceInterface,
websocket,
)
audioBuffer.clear() audioBuffer.clear()
bufferDurationMs = 0 bufferDurationMs = 0
task = asyncio.create_task(
self._processAudioBuffer(
chunkBytes,
sessionId,
interface,
voiceInterface,
websocket,
)
)
backgroundTasks.append(task)
# Clean up completed tasks
backgroundTasks = [t for t in backgroundTasks if not t.done()]
except Exception as e: except Exception as e:
if "disconnect" not in str(e).lower(): if "disconnect" not in str(e).lower():
logger.error(f"Audio stream error for session {sessionId}: {e}") logger.error(f"Audio stream error for session {sessionId}: {e}")
@ -245,6 +272,10 @@ class TeamsbotService:
websocket, websocket,
) )
# Wait for any remaining background tasks
if backgroundTasks:
await asyncio.gather(*backgroundTasks, return_exceptions=True)
logger.info(f"Audio processing ended for session {sessionId}") logger.info(f"Audio processing ended for session {sessionId}")
async def _processAudioBuffer( async def _processAudioBuffer(
@ -258,12 +289,14 @@ class TeamsbotService:
"""Process a buffered audio chunk through the STT -> AI -> TTS pipeline.""" """Process a buffered audio chunk through the STT -> AI -> TTS pipeline."""
# Step 1: STT -- convert audio to text # Step 1: STT -- convert audio to text
# skipFallbacks=True because we know the exact format (LINEAR16, 16kHz, mono from Teams)
try: try:
sttResult = voiceInterface.speechToText( sttResult = await voiceInterface.speechToText(
audioContent=audioBytes, audioContent=audioBytes,
language=self.config.language, language=self.config.language,
sampleRate=16000, sampleRate=16000,
channels=1 channels=1,
skipFallbacks=True
) )
except Exception as e: except Exception as e:
logger.warning(f"STT failed for session {sessionId}: {e}") logger.warning(f"STT failed for session {sessionId}: {e}")
@ -281,7 +314,7 @@ class TeamsbotService:
sessionId=sessionId, sessionId=sessionId,
speaker=speaker, speaker=speaker,
text=transcriptText, text=transcriptText,
timestamp=getUtcTimestamp(), timestamp=str(getUtcTimestamp()),
confidence=confidence, confidence=confidence,
language=self.config.language, language=self.config.language,
isFinal=True, isFinal=True,
@ -377,9 +410,9 @@ class TeamsbotService:
try: try:
from modules.services.serviceAi.mainServiceAi import AiService from modules.services.serviceAi.mainServiceAi import AiService
# Create AiService with service center context # Create minimal service context for AI billing
# Note: In production, serviceCenter should be passed properly serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
aiService = AiService(serviceCenter=None) aiService = AiService(serviceCenter=serviceContext)
await aiService.ensureAiObjectsInitialized() await aiService.ensureAiObjectsInitialized()
request = AiCallRequest( request = AiCallRequest(
@ -446,7 +479,7 @@ class TeamsbotService:
# 4a: TTS -> Audio to bridge # 4a: TTS -> Audio to bridge
try: try:
ttsResult = voiceInterface.textToSpeech( ttsResult = await voiceInterface.textToSpeech(
text=speechResult.responseText, text=speechResult.responseText,
languageCode=self.config.language, languageCode=self.config.language,
voiceName=self.config.voiceId voiceName=self.config.voiceId
@ -476,7 +509,7 @@ class TeamsbotService:
modelName=response.modelName, modelName=response.modelName,
processingTime=response.processingTime, processingTime=response.processingTime,
priceCHF=response.priceCHF, priceCHF=response.priceCHF,
timestamp=getUtcTimestamp(), timestamp=str(getUtcTimestamp()),
).model_dump() ).model_dump()
createdResponse = interface.createBotResponse(botResponseData) createdResponse = interface.createBotResponse(botResponseData)
@ -528,7 +561,8 @@ class TeamsbotService:
from modules.services.serviceAi.mainServiceAi import AiService from modules.services.serviceAi.mainServiceAi import AiService
aiService = AiService(serviceCenter=None) serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
aiService = AiService(serviceCenter=serviceContext)
await aiService.ensureAiObjectsInitialized() await aiService.ensureAiObjectsInitialized()
request = AiCallRequest( request = AiCallRequest(

View file

@ -66,7 +66,8 @@ class VoiceObjects:
# Speech-to-Text Operations # Speech-to-Text Operations
async def speechToText(self, audioContent: bytes, language: str = "de-DE", async def speechToText(self, audioContent: bytes, language: str = "de-DE",
sampleRate: int = None, channels: int = None) -> Dict[str, Any]: sampleRate: int = None, channels: int = None,
skipFallbacks: bool = False) -> Dict[str, Any]:
""" """
Convert speech to text using Google Cloud Speech-to-Text API. Convert speech to text using Google Cloud Speech-to-Text API.
@ -75,6 +76,7 @@ class VoiceObjects:
language: Language code (e.g., 'de-DE', 'en-US') language: Language code (e.g., 'de-DE', 'en-US')
sampleRate: Audio sample rate (auto-detected if None) sampleRate: Audio sample rate (auto-detected if None)
channels: Number of audio channels (auto-detected if None) channels: Number of audio channels (auto-detected if None)
skipFallbacks: If True, skip fallback attempts (use when audio format is known)
Returns: Returns:
Dict containing transcribed text, confidence, and metadata Dict containing transcribed text, confidence, and metadata
@ -87,7 +89,8 @@ class VoiceObjects:
audioContent=audioContent, audioContent=audioContent,
language=language, language=language,
sampleRate=sampleRate, sampleRate=sampleRate,
channels=channels channels=channels,
skipFallbacks=skipFallbacks
) )
if result["success"]: if result["success"]:

View file

@ -390,6 +390,22 @@ def _applyForeignKeys(cursor, tables: Optional[List[str]]) -> int:
logger.warning(f"Failed to drop FK {constraintName}: {e}") logger.warning(f"Failed to drop FK {constraintName}: {e}")
continue continue
# Clean up orphaned rows before applying FK constraint
try:
cursor.execute(f"""
DELETE FROM "{tableName}"
WHERE "{column}" IS NOT NULL
AND "{column}" NOT IN (SELECT "id" FROM "{refTable}")
""")
orphanCount = cursor.rowcount
if orphanCount > 0:
logger.info(
f"Cleaned {orphanCount} orphaned row(s) from {tableName} "
f"(missing {refTable} reference via {column})"
)
except Exception as e:
logger.warning(f"Failed to clean orphans for FK {constraintName}: {e}")
try: try:
cursor.execute(f""" cursor.execute(f"""
ALTER TABLE "{tableName}" ALTER TABLE "{tableName}"