fix: WS auto-reconnect, reduce keepalive to 15s, downgrade ChatDOM to debug

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
ValueOn AG 2026-02-19 00:46:41 +01:00
parent 48c5c33f63
commit 9e4aad973f
2 changed files with 46 additions and 12 deletions

View file

@ -134,7 +134,7 @@ export class ChatProcedure {
children: number;
html: string;
}) => {
this._logger.info(`ChatDOM: <${info.tag} data-tid="${info.tid}"> children=${info.children}, text="${info.text.substring(0, 120)}"`);
this._logger.debug(`ChatDOM: <${info.tag} data-tid="${info.tid}"> children=${info.children}, text="${info.text.substring(0, 120)}"`);
});
} catch {
// Already exposed

View file

@ -511,9 +511,9 @@ export class BotOrchestrator {
// Connection might be closing
}
}
}, 30000);
}, 15000);
this._logger.info('Keepalive started (30s interval)');
this._logger.info('Keepalive started (15s interval)');
}
/**
@ -527,22 +527,32 @@ export class BotOrchestrator {
}
}
private _wsReconnectAttempts: number = 0;
private _wsMaxReconnectAttempts: number = 10;
private _wsReconnecting: boolean = false;
/**
* Connect to the Gateway WebSocket for this session.
*/
private async _connectToGateway(): Promise<void> {
// gatewayWsUrl is the full WebSocket URL provided by the Gateway
// It already includes instanceId and sessionId
const wsUrl = this._options.gatewayWsUrl;
this._logger.info(`Connecting to Gateway: ${wsUrl}`);
// Derive HTTP base URL from WebSocket URL for fallback
this._httpBaseUrl = wsUrl
.replace('wss://', 'https://')
.replace('ws://', 'http://')
.replace(/\/bot\/ws\/.*$/, '');
return new Promise((resolve, reject) => {
return this._createWsConnection(wsUrl, true);
}
/**
* Create (or recreate) the WebSocket connection.
* On initial connect, `isInitial` = true and the promise resolves/rejects.
* On reconnect, the promise resolves immediately (fire-and-forget).
*/
private _createWsConnection(wsUrl: string, isInitial: boolean): Promise<void> {
return new Promise((resolve) => {
this._gatewayWs = new WebSocket(wsUrl);
const wsTimeout = setTimeout(() => {
@ -551,7 +561,7 @@ export class BotOrchestrator {
this._useHttpFallback = true;
this._gatewayWs?.close();
this._gatewayWs = null;
resolve(); // Continue with HTTP fallback instead of failing
resolve();
}
}, 10000);
@ -559,6 +569,8 @@ export class BotOrchestrator {
clearTimeout(wsTimeout);
this._logger.info('Connected to Gateway via WebSocket');
this._useHttpFallback = false;
this._wsReconnectAttempts = 0;
this._wsReconnecting = false;
resolve();
});
@ -570,23 +582,45 @@ export class BotOrchestrator {
this._gatewayWs.on('close', (code, reason) => {
this._logger.warn(`Gateway WebSocket closed: ${code} - ${reason}`);
if (!this._isShuttingDown && !this._useHttpFallback) {
this._logger.info('Switching to HTTP fallback for transcript delivery');
if (!this._isShuttingDown) {
this._useHttpFallback = true;
this._scheduleReconnect(wsUrl);
}
});
this._gatewayWs.on('error', (error) => {
clearTimeout(wsTimeout);
this._logger.error('Gateway WebSocket error:', error);
this._logger.info('Switching to HTTP fallback for transcript delivery');
this._useHttpFallback = true;
this._gatewayWs = null;
resolve(); // Continue with HTTP fallback
if (isInitial) resolve();
});
});
}
/**
* Schedule a WebSocket reconnection with exponential backoff.
*/
private _scheduleReconnect(wsUrl: string): void {
if (this._isShuttingDown || this._wsReconnecting) return;
if (this._wsReconnectAttempts >= this._wsMaxReconnectAttempts) {
this._logger.warn(`WebSocket reconnect limit reached (${this._wsMaxReconnectAttempts}), staying on HTTP fallback`);
return;
}
this._wsReconnecting = true;
this._wsReconnectAttempts++;
const delayMs = Math.min(2000 * Math.pow(1.5, this._wsReconnectAttempts - 1), 30000);
this._logger.info(`WebSocket reconnect attempt ${this._wsReconnectAttempts}/${this._wsMaxReconnectAttempts} in ${(delayMs / 1000).toFixed(1)}s`);
setTimeout(() => {
if (this._isShuttingDown) return;
this._createWsConnection(wsUrl, false).catch((err) => {
this._logger.error('WebSocket reconnect failed:', err);
});
}, delayMs);
}
/**
* Handle incoming messages from the Gateway.
* Async operations are awaited to ensure proper error handling