Merge branch 'int' of https://github.com/valueonag/gateway into int
This commit is contained in:
commit
f9d2f2061e
6 changed files with 152 additions and 51 deletions
|
|
@ -58,7 +58,8 @@ class ConnectorGoogleSpeech:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def speechToText(self, audioContent: bytes, language: str = "de-DE",
|
async def speechToText(self, audioContent: bytes, language: str = "de-DE",
|
||||||
sampleRate: int = None, channels: int = None) -> Dict:
|
sampleRate: int = None, channels: int = None,
|
||||||
|
skipFallbacks: bool = False) -> Dict:
|
||||||
"""
|
"""
|
||||||
Convert speech to text using Google Cloud Speech-to-Text API.
|
Convert speech to text using Google Cloud Speech-to-Text API.
|
||||||
|
|
||||||
|
|
@ -86,6 +87,9 @@ class ConnectorGoogleSpeech:
|
||||||
channels = validation["channels"]
|
channels = validation["channels"]
|
||||||
audioFormat = validation["format"]
|
audioFormat = validation["format"]
|
||||||
logger.info(f"Auto-detected audio: {audioFormat}, {sampleRate}Hz, {channels}ch")
|
logger.info(f"Auto-detected audio: {audioFormat}, {sampleRate}Hz, {channels}ch")
|
||||||
|
else:
|
||||||
|
# When sampleRate and channels are explicitly provided, assume raw PCM (LINEAR16)
|
||||||
|
audioFormat = "linear16"
|
||||||
|
|
||||||
logger.info(f"Processing audio with Google Cloud Speech-to-Text")
|
logger.info(f"Processing audio with Google Cloud Speech-to-Text")
|
||||||
logger.info(f"Audio: {len(audioContent)} bytes, {sampleRate}Hz, {channels}ch")
|
logger.info(f"Audio: {len(audioContent)} bytes, {sampleRate}Hz, {channels}ch")
|
||||||
|
|
@ -231,6 +235,15 @@ class ConnectorGoogleSpeech:
|
||||||
"error": f"Google Cloud error: {response.error}"
|
"error": f"Google Cloud error: {response.error}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Skip fallbacks when format is known (e.g. teamsbot with explicit LINEAR16 16kHz)
|
||||||
|
if skipFallbacks:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"text": "",
|
||||||
|
"confidence": 0.0,
|
||||||
|
"error": "No recognition results (silence or unclear audio)"
|
||||||
|
}
|
||||||
|
|
||||||
# Try multiple fallback approaches
|
# Try multiple fallback approaches
|
||||||
fallback_configs = []
|
fallback_configs = []
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -301,12 +301,12 @@ def _migrateExistingTemplates() -> None:
|
||||||
as system templates (isSystem=True). This runs idempotently during feature registration.
|
as system templates (isSystem=True). This runs idempotently during feature registration.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from modules.features.automation.interfaceFeatureAutomation import getAutomationInterface
|
from modules.features.automation.interfaceFeatureAutomation import getInterface
|
||||||
from modules.security.rootAccess import getRootUser
|
from modules.security.rootAccess import getRootUser
|
||||||
from modules.features.automation.datamodelFeatureAutomation import AutomationTemplate
|
from modules.features.automation.datamodelFeatureAutomation import AutomationTemplate
|
||||||
|
|
||||||
rootUser = getRootUser()
|
rootUser = getRootUser()
|
||||||
automationInterface = getAutomationInterface(rootUser)
|
automationInterface = getInterface(rootUser)
|
||||||
|
|
||||||
# Get all templates from DB
|
# Get all templates from DB
|
||||||
allTemplates = automationInterface.db.getRecordset(AutomationTemplate)
|
allTemplates = automationInterface.db.getRecordset(AutomationTemplate)
|
||||||
|
|
|
||||||
|
|
@ -362,26 +362,42 @@ async def bridgeStatusCallback(
|
||||||
|
|
||||||
logger.info(f"Bridge status callback: session={sessionId}, status={status}")
|
logger.info(f"Bridge status callback: session={sessionId}, status={status}")
|
||||||
|
|
||||||
# Update session status
|
try:
|
||||||
from modules.datamodels.datamodelUam import User
|
# Load the original user who started the session (has RBAC roles in mandate)
|
||||||
systemUser = User(id="system", email="system@internal")
|
from modules.datamodels.datamodelUam import User
|
||||||
interface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId)
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
|
||||||
updates = {"status": status}
|
systemUser = User(id="system", username="system", email="system@poweron.swiss")
|
||||||
if errorMessage:
|
interface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId)
|
||||||
updates["errorMessage"] = errorMessage
|
|
||||||
if status == TeamsbotSessionStatus.ACTIVE.value:
|
|
||||||
from modules.shared.timeUtils import getUtcTimestamp
|
|
||||||
updates["startedAt"] = getUtcTimestamp()
|
|
||||||
elif status in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
|
|
||||||
from modules.shared.timeUtils import getUtcTimestamp
|
|
||||||
updates["endedAt"] = getUtcTimestamp()
|
|
||||||
|
|
||||||
interface.updateSession(sessionId, updates)
|
# Look up original user from session for consistent context
|
||||||
|
session = interface.getSession(sessionId)
|
||||||
|
startedByUserId = session.get("startedByUserId") if session else None
|
||||||
|
if startedByUserId:
|
||||||
|
rootInterface = getRootInterface()
|
||||||
|
originalUser = rootInterface.getUser(startedByUserId)
|
||||||
|
if originalUser:
|
||||||
|
interface = interfaceDb.getInterface(originalUser, featureInstanceId=instanceId)
|
||||||
|
|
||||||
# Emit SSE event
|
updates = {"status": status}
|
||||||
from .service import _emitSessionEvent
|
if errorMessage:
|
||||||
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
|
updates["errorMessage"] = errorMessage
|
||||||
|
if status == TeamsbotSessionStatus.ACTIVE.value:
|
||||||
|
from modules.shared.timeUtils import getUtcTimestamp
|
||||||
|
updates["startedAt"] = getUtcTimestamp()
|
||||||
|
elif status in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
|
||||||
|
from modules.shared.timeUtils import getUtcTimestamp
|
||||||
|
updates["endedAt"] = getUtcTimestamp()
|
||||||
|
|
||||||
|
interface.updateSession(sessionId, updates)
|
||||||
|
|
||||||
|
# Emit SSE event
|
||||||
|
from .service import _emitSessionEvent
|
||||||
|
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Bridge status callback processing failed: session={sessionId}, error={e}")
|
||||||
|
# Still return 200 so the bridge doesn't retry endlessly
|
||||||
|
|
||||||
return {"received": True}
|
return {"received": True}
|
||||||
|
|
||||||
|
|
@ -398,21 +414,40 @@ async def bridgeAudioWebsocket(
|
||||||
Gateway sends: TTS audio responses
|
Gateway sends: TTS audio responses
|
||||||
"""
|
"""
|
||||||
await websocket.accept()
|
await websocket.accept()
|
||||||
logger.info(f"Bridge audio WebSocket connected: session={sessionId}")
|
logger.info(f"Bridge audio WebSocket connected: session={sessionId}, instance={instanceId}")
|
||||||
|
|
||||||
# TODO: Validate bridge API key from headers/query params
|
# TODO: Validate bridge API key from headers/query params
|
||||||
|
|
||||||
config = _getInstanceConfig(instanceId)
|
|
||||||
|
|
||||||
from modules.datamodels.datamodelUam import User
|
|
||||||
systemUser = User(id="system", email="system@internal")
|
|
||||||
service = TeamsbotService(systemUser, None, instanceId, config)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
config = _getInstanceConfig(instanceId)
|
||||||
|
logger.info(f"Bridge audio WebSocket config loaded: session={sessionId}")
|
||||||
|
|
||||||
|
# Load the original user who started the session (has RBAC roles in mandate)
|
||||||
|
# Bridge callbacks have no HTTP auth, so we reconstruct the user context from the session record.
|
||||||
|
from modules.datamodels.datamodelUam import User
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
|
||||||
|
systemUser = User(id="system", username="system", email="system@poweron.swiss")
|
||||||
|
sessionInterface = interfaceDb.getInterface(systemUser, featureInstanceId=instanceId)
|
||||||
|
session = sessionInterface.getSession(sessionId)
|
||||||
|
mandateId = session.get("mandateId") if session else None
|
||||||
|
startedByUserId = session.get("startedByUserId") if session else None
|
||||||
|
|
||||||
|
# Look up the original user (getRootInterface uses admin context, can load any user)
|
||||||
|
rootInterface = getRootInterface()
|
||||||
|
originalUser = rootInterface.getUser(startedByUserId) if startedByUserId else None
|
||||||
|
|
||||||
|
if not originalUser:
|
||||||
|
logger.warning(f"Could not load original user {startedByUserId}, falling back to system user")
|
||||||
|
originalUser = systemUser
|
||||||
|
|
||||||
|
service = TeamsbotService(originalUser, mandateId, instanceId, config)
|
||||||
|
logger.info(f"Bridge audio WebSocket service created: session={sessionId}, mandateId={mandateId}, user={originalUser.id}")
|
||||||
|
|
||||||
await service.handleAudioStream(websocket, sessionId)
|
await service.handleAudioStream(websocket, sessionId)
|
||||||
except WebSocketDisconnect:
|
except WebSocketDisconnect:
|
||||||
logger.info(f"Bridge audio WebSocket disconnected: session={sessionId}")
|
logger.info(f"Bridge audio WebSocket disconnected: session={sessionId}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Bridge audio WebSocket error: session={sessionId}, error={e}")
|
logger.error(f"Bridge audio WebSocket error: session={sessionId}, error={e}", exc_info=True)
|
||||||
finally:
|
finally:
|
||||||
logger.info(f"Bridge audio WebSocket closed: session={sessionId}")
|
logger.info(f"Bridge audio WebSocket closed: session={sessionId}")
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,22 @@ from .bridgeConnector import BridgeConnector
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Minimal Service Context (for AI billing in bridge callbacks)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class _ServiceContext:
|
||||||
|
"""Minimal context providing user/mandate info for AiService billing.
|
||||||
|
Used by bridge callbacks where a full Services instance is not available."""
|
||||||
|
|
||||||
|
def __init__(self, user, mandateId, featureInstanceId=None):
|
||||||
|
self.user = user
|
||||||
|
self.mandateId = mandateId
|
||||||
|
self.featureInstanceId = featureInstanceId
|
||||||
|
self.featureCode = "teamsbot"
|
||||||
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Session Event Queues (for SSE streaming to frontend)
|
# Session Event Queues (for SSE streaming to frontend)
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
@ -182,12 +198,15 @@ class TeamsbotService:
|
||||||
|
|
||||||
audioBuffer = bytearray()
|
audioBuffer = bytearray()
|
||||||
bufferDurationMs = 0
|
bufferDurationMs = 0
|
||||||
targetBufferMs = 1500 # Buffer 1.5 seconds of audio before STT
|
targetBufferMs = 3000 # Buffer 3 seconds of audio before STT
|
||||||
|
|
||||||
# PCM16 at 16kHz mono = 32000 bytes/second
|
# PCM16 at 16kHz mono = 32000 bytes/second
|
||||||
bytesPerSecond = 32000
|
bytesPerSecond = 32000
|
||||||
bytesPerMs = bytesPerSecond / 1000
|
bytesPerMs = bytesPerSecond / 1000
|
||||||
|
|
||||||
|
# Track background STT/AI tasks so they don't block the WebSocket loop
|
||||||
|
backgroundTasks: list[asyncio.Task] = []
|
||||||
|
|
||||||
logger.info(f"Audio processing started for session {sessionId}")
|
logger.info(f"Audio processing started for session {sessionId}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -219,18 +238,26 @@ class TeamsbotService:
|
||||||
audioBuffer.extend(audioChunk)
|
audioBuffer.extend(audioChunk)
|
||||||
bufferDurationMs = len(audioBuffer) / bytesPerMs
|
bufferDurationMs = len(audioBuffer) / bytesPerMs
|
||||||
|
|
||||||
# Process when buffer has enough audio
|
# Process when buffer has enough audio - run in background to not block WebSocket
|
||||||
if bufferDurationMs >= targetBufferMs:
|
if bufferDurationMs >= targetBufferMs:
|
||||||
await self._processAudioBuffer(
|
chunkBytes = bytes(audioBuffer)
|
||||||
bytes(audioBuffer),
|
|
||||||
sessionId,
|
|
||||||
interface,
|
|
||||||
voiceInterface,
|
|
||||||
websocket,
|
|
||||||
)
|
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
bufferDurationMs = 0
|
bufferDurationMs = 0
|
||||||
|
|
||||||
|
task = asyncio.create_task(
|
||||||
|
self._processAudioBuffer(
|
||||||
|
chunkBytes,
|
||||||
|
sessionId,
|
||||||
|
interface,
|
||||||
|
voiceInterface,
|
||||||
|
websocket,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
backgroundTasks.append(task)
|
||||||
|
|
||||||
|
# Clean up completed tasks
|
||||||
|
backgroundTasks = [t for t in backgroundTasks if not t.done()]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if "disconnect" not in str(e).lower():
|
if "disconnect" not in str(e).lower():
|
||||||
logger.error(f"Audio stream error for session {sessionId}: {e}")
|
logger.error(f"Audio stream error for session {sessionId}: {e}")
|
||||||
|
|
@ -245,6 +272,10 @@ class TeamsbotService:
|
||||||
websocket,
|
websocket,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Wait for any remaining background tasks
|
||||||
|
if backgroundTasks:
|
||||||
|
await asyncio.gather(*backgroundTasks, return_exceptions=True)
|
||||||
|
|
||||||
logger.info(f"Audio processing ended for session {sessionId}")
|
logger.info(f"Audio processing ended for session {sessionId}")
|
||||||
|
|
||||||
async def _processAudioBuffer(
|
async def _processAudioBuffer(
|
||||||
|
|
@ -258,12 +289,14 @@ class TeamsbotService:
|
||||||
"""Process a buffered audio chunk through the STT -> AI -> TTS pipeline."""
|
"""Process a buffered audio chunk through the STT -> AI -> TTS pipeline."""
|
||||||
|
|
||||||
# Step 1: STT -- convert audio to text
|
# Step 1: STT -- convert audio to text
|
||||||
|
# skipFallbacks=True because we know the exact format (LINEAR16, 16kHz, mono from Teams)
|
||||||
try:
|
try:
|
||||||
sttResult = voiceInterface.speechToText(
|
sttResult = await voiceInterface.speechToText(
|
||||||
audioContent=audioBytes,
|
audioContent=audioBytes,
|
||||||
language=self.config.language,
|
language=self.config.language,
|
||||||
sampleRate=16000,
|
sampleRate=16000,
|
||||||
channels=1
|
channels=1,
|
||||||
|
skipFallbacks=True
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"STT failed for session {sessionId}: {e}")
|
logger.warning(f"STT failed for session {sessionId}: {e}")
|
||||||
|
|
@ -281,7 +314,7 @@ class TeamsbotService:
|
||||||
sessionId=sessionId,
|
sessionId=sessionId,
|
||||||
speaker=speaker,
|
speaker=speaker,
|
||||||
text=transcriptText,
|
text=transcriptText,
|
||||||
timestamp=getUtcTimestamp(),
|
timestamp=str(getUtcTimestamp()),
|
||||||
confidence=confidence,
|
confidence=confidence,
|
||||||
language=self.config.language,
|
language=self.config.language,
|
||||||
isFinal=True,
|
isFinal=True,
|
||||||
|
|
@ -377,9 +410,9 @@ class TeamsbotService:
|
||||||
try:
|
try:
|
||||||
from modules.services.serviceAi.mainServiceAi import AiService
|
from modules.services.serviceAi.mainServiceAi import AiService
|
||||||
|
|
||||||
# Create AiService with service center context
|
# Create minimal service context for AI billing
|
||||||
# Note: In production, serviceCenter should be passed properly
|
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
||||||
aiService = AiService(serviceCenter=None)
|
aiService = AiService(serviceCenter=serviceContext)
|
||||||
await aiService.ensureAiObjectsInitialized()
|
await aiService.ensureAiObjectsInitialized()
|
||||||
|
|
||||||
request = AiCallRequest(
|
request = AiCallRequest(
|
||||||
|
|
@ -446,7 +479,7 @@ class TeamsbotService:
|
||||||
|
|
||||||
# 4a: TTS -> Audio to bridge
|
# 4a: TTS -> Audio to bridge
|
||||||
try:
|
try:
|
||||||
ttsResult = voiceInterface.textToSpeech(
|
ttsResult = await voiceInterface.textToSpeech(
|
||||||
text=speechResult.responseText,
|
text=speechResult.responseText,
|
||||||
languageCode=self.config.language,
|
languageCode=self.config.language,
|
||||||
voiceName=self.config.voiceId
|
voiceName=self.config.voiceId
|
||||||
|
|
@ -476,7 +509,7 @@ class TeamsbotService:
|
||||||
modelName=response.modelName,
|
modelName=response.modelName,
|
||||||
processingTime=response.processingTime,
|
processingTime=response.processingTime,
|
||||||
priceCHF=response.priceCHF,
|
priceCHF=response.priceCHF,
|
||||||
timestamp=getUtcTimestamp(),
|
timestamp=str(getUtcTimestamp()),
|
||||||
).model_dump()
|
).model_dump()
|
||||||
|
|
||||||
createdResponse = interface.createBotResponse(botResponseData)
|
createdResponse = interface.createBotResponse(botResponseData)
|
||||||
|
|
@ -528,7 +561,8 @@ class TeamsbotService:
|
||||||
|
|
||||||
from modules.services.serviceAi.mainServiceAi import AiService
|
from modules.services.serviceAi.mainServiceAi import AiService
|
||||||
|
|
||||||
aiService = AiService(serviceCenter=None)
|
serviceContext = _ServiceContext(self.currentUser, self.mandateId, self.instanceId)
|
||||||
|
aiService = AiService(serviceCenter=serviceContext)
|
||||||
await aiService.ensureAiObjectsInitialized()
|
await aiService.ensureAiObjectsInitialized()
|
||||||
|
|
||||||
request = AiCallRequest(
|
request = AiCallRequest(
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,8 @@ class VoiceObjects:
|
||||||
# Speech-to-Text Operations
|
# Speech-to-Text Operations
|
||||||
|
|
||||||
async def speechToText(self, audioContent: bytes, language: str = "de-DE",
|
async def speechToText(self, audioContent: bytes, language: str = "de-DE",
|
||||||
sampleRate: int = None, channels: int = None) -> Dict[str, Any]:
|
sampleRate: int = None, channels: int = None,
|
||||||
|
skipFallbacks: bool = False) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Convert speech to text using Google Cloud Speech-to-Text API.
|
Convert speech to text using Google Cloud Speech-to-Text API.
|
||||||
|
|
||||||
|
|
@ -75,6 +76,7 @@ class VoiceObjects:
|
||||||
language: Language code (e.g., 'de-DE', 'en-US')
|
language: Language code (e.g., 'de-DE', 'en-US')
|
||||||
sampleRate: Audio sample rate (auto-detected if None)
|
sampleRate: Audio sample rate (auto-detected if None)
|
||||||
channels: Number of audio channels (auto-detected if None)
|
channels: Number of audio channels (auto-detected if None)
|
||||||
|
skipFallbacks: If True, skip fallback attempts (use when audio format is known)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict containing transcribed text, confidence, and metadata
|
Dict containing transcribed text, confidence, and metadata
|
||||||
|
|
@ -87,7 +89,8 @@ class VoiceObjects:
|
||||||
audioContent=audioContent,
|
audioContent=audioContent,
|
||||||
language=language,
|
language=language,
|
||||||
sampleRate=sampleRate,
|
sampleRate=sampleRate,
|
||||||
channels=channels
|
channels=channels,
|
||||||
|
skipFallbacks=skipFallbacks
|
||||||
)
|
)
|
||||||
|
|
||||||
if result["success"]:
|
if result["success"]:
|
||||||
|
|
|
||||||
|
|
@ -390,6 +390,22 @@ def _applyForeignKeys(cursor, tables: Optional[List[str]]) -> int:
|
||||||
logger.warning(f"Failed to drop FK {constraintName}: {e}")
|
logger.warning(f"Failed to drop FK {constraintName}: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Clean up orphaned rows before applying FK constraint
|
||||||
|
try:
|
||||||
|
cursor.execute(f"""
|
||||||
|
DELETE FROM "{tableName}"
|
||||||
|
WHERE "{column}" IS NOT NULL
|
||||||
|
AND "{column}" NOT IN (SELECT "id" FROM "{refTable}")
|
||||||
|
""")
|
||||||
|
orphanCount = cursor.rowcount
|
||||||
|
if orphanCount > 0:
|
||||||
|
logger.info(
|
||||||
|
f"Cleaned {orphanCount} orphaned row(s) from {tableName} "
|
||||||
|
f"(missing {refTable} reference via {column})"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to clean orphans for FK {constraintName}: {e}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
ALTER TABLE "{tableName}"
|
ALTER TABLE "{tableName}"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue