release: v2.80.32 - LM Studio SDK resilience & auto-recovery

2026-05-11 13:19:07 +09:00
parent 5d3df0816f
commit 6347a223a7
10 changed files with 195 additions and 72 deletions
@@ -7,8 +7,14 @@ export interface ILMStudioClient {
    listLoaded(): Promise<string[]>;
    /** Like listLoaded() but caches the result for `ttlMs` to avoid hammering the SDK. */
    listLoadedCached(ttlMs?: number): Promise<string[]>;
-    /** Resolve a chat-ready handle for an already-loaded (or just-loaded) model. */
-    getModelHandle(modelKey: string): Promise<LLM>;
+    /**
+     * Resolve a chat-ready handle for an already-loaded (or just-loaded) model.
+     *
+     * `options.refresh: true` drops the SDK + WebSocket so any disposed handle
+     * sitting in the SDK's internal handle map is discarded. Use this after a
+     * "Model is disposed!" or "lock() request could not be registered" error.
+     */
+    getModelHandle(modelKey: string, options?: { refresh?: boolean }): Promise<LLM>;
    isReachable(): Promise<boolean>;
    setBaseUrl(httpBaseUrl: string): void;
 }
@@ -111,8 +117,17 @@ export class LMStudioClient implements ILMStudioClient {
        }
    }

-    async getModelHandle(modelKey: string): Promise<LLM> {
+    async getModelHandle(modelKey: string, options?: { refresh?: boolean }): Promise<LLM> {
        try {
+            if (options?.refresh) {
+                // Recreate the SDK + WebSocket so the SDK's internal handle
+                // cache is dropped. The next llm.model() call mints a fresh
+                // handle instead of returning the disposed one from the
+                // previous (aborted) prediction.
+                this._sdk = undefined;
+                this._loadedCache = undefined;
+                logInfo('LM Studio SDK handle refresh requested — dropped cached SDK client.', { modelKey });
+            }
            return await this.getSdk().llm.model(modelKey);
        } catch (e: any) {
            const msg = e?.message ?? String(e);
@@ -18,6 +18,12 @@ export interface ChatStreamRequest {
 export interface IChatStreamer {
    /** Token-level streaming for an LM Studio chat completion via the WebSocket SDK. */
    stream(req: ChatStreamRequest): AsyncIterable<{ token: string }>;
+    /**
+     * Drop the SDK's cached handle for `modelName`. Callers invoke this when
+     * the previous stream returned zero tokens with no error — a symptom of a
+     * silently-disposed handle that needs a fresh WebSocket round-trip.
+     */
+    resetHandle?(modelName: string): Promise<void>;
 }

 /**
@@ -39,41 +45,84 @@ export class LMStudioStreamer implements IChatStreamer {
            throw new LMStudioLifecycleError('LMStudioStreamer.stream called without a model name.');
        }

-        const model = await this.client.getModelHandle(trimmedModel);
-        logInfo('LM Studio SDK chat stream started.', { model: trimmedModel, messageCount: req.messages.length });
+        // One automatic retry path: when the first attempt blows up with a
+        // "Model is disposed!" / "lock() request could not be registered"
+        // error before any tokens have been yielded, we drop the cached SDK
+        // handle and try once more. These errors are caused by a previous
+        // aborted prediction leaving the SDK's internal handle map pointing
+        // at a dead WebSocket binding — a fresh client.model() lookup minted
+        // from a recreated SDK fixes it. We only retry when zero tokens have
+        // streamed: if the consumer already saw partial output, restarting
+        // would duplicate tokens.
+        for (let attempt = 1; attempt <= 2; attempt++) {
+            const refresh = attempt > 1;
+            const model = await this.client.getModelHandle(trimmedModel, refresh ? { refresh: true } : undefined);
+            logInfo('LM Studio SDK chat stream started.', { model: trimmedModel, messageCount: req.messages.length, attempt });

-        const prediction = (model as any).respond(req.messages, {
-            temperature: req.temperature,
-            maxTokens: req.maxTokens ?? 4096,
-            signal: req.signal,
-        });
+            const prediction = (model as any).respond(req.messages, {
+                temperature: req.temperature,
+                maxTokens: req.maxTokens ?? 4096,
+                signal: req.signal,
+            });

-        // Bridge AbortSignal → prediction.cancel(): without this, an aborted
-        // request keeps generating on the LM Studio server. The orphaned
-        // prediction holds locks on the model handle, which is a known cause
-        // of "lock() request could not be registered" on the very next
-        // request — the reused handle is still bound to a dead prediction.
-        const onAbort = () => {
-            try { (prediction as any)?.cancel?.(); } catch { /* swallow — best effort */ }
-        };
-        if (req.signal) {
-            if (req.signal.aborted) onAbort();
-            else req.signal.addEventListener('abort', onAbort, { once: true });
-        }
-
-        try {
-            for await (const fragment of prediction as AsyncIterable<{ content: string }>) {
-                if (req.signal?.aborted) return;
-                const token = fragment?.content ?? '';
-                if (token) yield { token };
+            // Bridge AbortSignal → prediction.cancel(): without this, an
+            // aborted request keeps generating on the LM Studio server. The
+            // orphaned prediction holds locks on the model handle, which is
+            // a known cause of "lock() request could not be registered" on
+            // the very next request — the reused handle is still bound to a
+            // dead prediction.
+            const onAbort = () => {
+                try { (prediction as any)?.cancel?.(); } catch { /* swallow — best effort */ }
+            };
+            if (req.signal) {
+                if (req.signal.aborted) onAbort();
+                else req.signal.addEventListener('abort', onAbort, { once: true });
            }
+
+            let yielded = 0;
+            let caught: any = null;
+            try {
+                for await (const fragment of prediction as AsyncIterable<{ content: string }>) {
+                    if (req.signal?.aborted) return;
+                    const token = fragment?.content ?? '';
+                    if (token) {
+                        yielded++;
+                        yield { token };
+                    }
+                }
+            } catch (err: any) {
+                if (req.signal?.aborted) return;
+                if (err?.name === 'AbortError') return;
+                caught = err;
+            } finally {
+                req.signal?.removeEventListener?.('abort', onAbort);
+            }
+
+            if (!caught) return;
+
+            const errMsg = String(caught?.message ?? caught);
+            const handleDead = /\bdisposed\b/i.test(errMsg)
+                || /lock\(\) request could not be registered/i.test(errMsg);
+
+            if (handleDead && yielded === 0 && attempt === 1) {
+                logInfo('Dead LM Studio handle detected — retrying with a fresh SDK.', { model: trimmedModel, error: errMsg });
+                continue;
+            }
+
+            logError('LM Studio SDK chat stream failed.', { model: trimmedModel, error: errMsg, attempt });
+            throw caught;
+        }
+    }
+
+    async resetHandle(modelName: string): Promise<void> {
+        const trimmed = (modelName || '').trim();
+        if (!trimmed) return;
+        try {
+            await this.client.getModelHandle(trimmed, { refresh: true });
        } catch (err: any) {
-            if (req.signal?.aborted) return;
-            if (err?.name === 'AbortError') return;
-            logError('LM Studio SDK chat stream failed.', { model: trimmedModel, error: err?.message ?? String(err) });
-            throw err;
-        } finally {
-            req.signal?.removeEventListener?.('abort', onAbort);
+            // Best effort — caller will see the next stream() attempt fail
+            // with a normal error path if the refresh itself was broken.
+            logError('LM Studio handle reset failed.', { model: trimmed, error: err?.message ?? String(err) });
        }
    }
 }