import { LMStudioClient as SDKClient, LLM, type LLMLoadModelConfig } from '@lmstudio/sdk'; import { logError, logInfo } from '../utils'; /** Load-time options forwarded to LM Studio's `llm.load()`. Subset of `LLMLoadModelConfig`. */ export interface LMStudioLoadConfig { flashAttention?: boolean; /** "max" | "off" | number 0-1 */ gpuOffloadRatio?: 'max' | 'off' | number; offloadKVCacheToGpu?: boolean; keepModelInMemory?: boolean; useFp16ForKVCache?: boolean; /** 0 / undefined = engine default */ evalBatchSize?: number; } export interface ILMStudioClient { load(modelKey: string, signal?: AbortSignal, loadConfig?: LMStudioLoadConfig): Promise; unload(modelKey: string): Promise; listLoaded(): Promise; /** Like listLoaded() but caches the result for `ttlMs` to avoid hammering the SDK. */ listLoadedCached(ttlMs?: number): Promise; /** * List every LLM the user has downloaded into LM Studio, regardless of * whether it is currently loaded. Returns the SDK `modelKey` of each entry — * the exact identifier `llm.load()` accepts. Use this for the dropdown so * the list does not depend on LM Studio's JIT setting (REST `/v1/models` * only returns loaded models when JIT is off). */ listDownloaded(): Promise; /** Cached variant; the downloaded list only changes when the user installs/removes a model. */ listDownloadedCached(ttlMs?: number): Promise; /** Pre-warm a draft model for speculative decoding. Idempotent + best-effort. */ preloadDraftModel?(draftModelKey: string): Promise; /** * Resolve a chat-ready handle for an already-loaded (or just-loaded) model. * * `options.refresh: true` drops the SDK + WebSocket so any disposed handle * sitting in the SDK's internal handle map is discarded. Use this after a * "Model is disposed!" or "lock() request could not be registered" error. */ getModelHandle(modelKey: string, options?: { refresh?: boolean }): Promise; isReachable(): Promise; setBaseUrl(httpBaseUrl: string): void; } export class LMStudioLifecycleError extends Error { constructor(message: string, public readonly cause?: unknown) { super(message); this.name = 'LMStudioLifecycleError'; } } export function httpToWebSocketUrl(httpBaseUrl: string): string | undefined { const trimmed = (httpBaseUrl || '').trim(); if (!trimmed) return undefined; try { const url = new URL(trimmed); if (url.protocol === 'http:') url.protocol = 'ws:'; else if (url.protocol === 'https:') url.protocol = 'wss:'; else if (url.protocol !== 'ws:' && url.protocol !== 'wss:') return undefined; // Strip every REST-only path suffix LM Studio ships with so the SDK lands on the // WebSocket root. Loop because /api/v0 → /api → '' should fully unwind. const REST_SUFFIXES = ['/api/v0', '/api/v1', '/v1', '/api']; let changed = true; while (changed) { changed = false; for (const suffix of REST_SUFFIXES) { if (url.pathname.endsWith(suffix)) { url.pathname = url.pathname.slice(0, -suffix.length); changed = true; break; } } } const out = url.toString().replace(/\/+$/, ''); return out; } catch { return undefined; } } export class LMStudioClient implements ILMStudioClient { private _sdk: SDKClient | undefined; private _wsUrl: string | undefined; private _loadedCache: { value: string[]; expiresAt: number } | undefined; private _downloadedCache: { value: string[]; expiresAt: number } | undefined; private static readonly DEFAULT_LOADED_CACHE_TTL_MS = 5000; private static readonly DEFAULT_DOWNLOADED_CACHE_TTL_MS = 60_000; constructor(httpBaseUrl: string) { this.setBaseUrl(httpBaseUrl); } setBaseUrl(httpBaseUrl: string): void { const ws = httpToWebSocketUrl(httpBaseUrl); if (ws !== this._wsUrl) { this._wsUrl = ws; this._sdk = undefined; this._loadedCache = undefined; this._downloadedCache = undefined; } } private getSdk(): SDKClient { if (!this._sdk) { this._sdk = new SDKClient(this._wsUrl ? { baseUrl: this._wsUrl } : {}); } return this._sdk; } async load(modelKey: string, signal?: AbortSignal, loadConfig?: LMStudioLoadConfig): Promise { try { const opts: { signal?: AbortSignal; config?: LLMLoadModelConfig } = {}; if (signal) opts.signal = signal; const config = this._buildLoadConfig(loadConfig); if (Object.keys(config).length > 0) opts.config = config; await this.getSdk().llm.load(modelKey, Object.keys(opts).length > 0 ? opts : undefined); this._loadedCache = undefined; // Loading does not change the downloaded-models set; leave _downloadedCache alone. logInfo('LM Studio model loaded.', { modelKey, configKeys: Object.keys(config) }); } catch (e: any) { const msg = e?.message ?? String(e); throw new LMStudioLifecycleError(`Failed to load LM Studio model "${modelKey}": ${msg}`, e); } } /** Translate our flat LMStudioLoadConfig into LM Studio's nested LLMLoadModelConfig shape. */ private _buildLoadConfig(lc: LMStudioLoadConfig | undefined): LLMLoadModelConfig { const out: LLMLoadModelConfig = {}; if (!lc) return out; if (typeof lc.flashAttention === 'boolean') out.flashAttention = lc.flashAttention; if (typeof lc.offloadKVCacheToGpu === 'boolean') out.offloadKVCacheToGpu = lc.offloadKVCacheToGpu; if (typeof lc.keepModelInMemory === 'boolean') out.keepModelInMemory = lc.keepModelInMemory; if (typeof lc.useFp16ForKVCache === 'boolean') out.useFp16ForKVCache = lc.useFp16ForKVCache; if (typeof lc.evalBatchSize === 'number' && lc.evalBatchSize > 0) out.evalBatchSize = lc.evalBatchSize; if (lc.gpuOffloadRatio !== undefined) { // GPUSetting is deprecated but still accepted — wraps a single `ratio`. out.gpu = { ratio: lc.gpuOffloadRatio as any }; } return out; } async preloadDraftModel(draftModelKey: string): Promise { const key = (draftModelKey || '').trim(); if (!key) return; try { const llm: any = this.getSdk().llm; if (typeof llm.unstable_preloadDraftModel === 'function') { await llm.unstable_preloadDraftModel(key); logInfo('LM Studio draft model preloaded.', { draftModelKey: key }); } } catch (e: any) { // Best-effort — the main model's respond({draftModel}) will still load it lazily. logError('LM Studio draft model preload failed.', { draftModelKey: key, error: e?.message ?? String(e) }); } } async unload(modelKey: string): Promise { try { await this.getSdk().llm.unload(modelKey); this._loadedCache = undefined; logInfo('LM Studio model unloaded.', { modelKey }); } catch (e: any) { const msg = e?.message ?? String(e); throw new LMStudioLifecycleError(`Failed to unload LM Studio model "${modelKey}": ${msg}`, e); } } /** Force the next downloaded/loaded-models call to re-fetch (use after install / remove). */ invalidateCaches(): void { this._loadedCache = undefined; this._downloadedCache = undefined; } async listLoaded(): Promise { try { const items: any[] = await this.getSdk().llm.listLoaded(); return items .map((m) => m?.identifier ?? m?.modelKey ?? m?.path ?? null) .filter((id): id is string => typeof id === 'string' && id.length > 0); } catch (e: any) { const msg = e?.message ?? String(e); throw new LMStudioLifecycleError(`Failed to list loaded LM Studio models: ${msg}`, e); } } async listLoadedCached(ttlMs: number = LMStudioClient.DEFAULT_LOADED_CACHE_TTL_MS): Promise { const now = Date.now(); if (this._loadedCache && this._loadedCache.expiresAt > now) { return this._loadedCache.value.slice(); } try { const value = await this.listLoaded(); this._loadedCache = { value, expiresAt: now + ttlMs }; return value.slice(); } catch { return []; } } async listDownloaded(): Promise { try { const items: any[] = await this.getSdk().system.listDownloadedModels('llm'); return items .map((m) => m?.modelKey ?? null) .filter((k): k is string => typeof k === 'string' && k.length > 0); } catch (e: any) { const msg = e?.message ?? String(e); logError('Failed to list downloaded LM Studio models.', { error: msg }); return []; } } async listDownloadedCached(ttlMs: number = LMStudioClient.DEFAULT_DOWNLOADED_CACHE_TTL_MS): Promise { const now = Date.now(); if (this._downloadedCache && this._downloadedCache.expiresAt > now) { return this._downloadedCache.value.slice(); } const value = await this.listDownloaded(); // Only cache non-empty results — an empty array often signals a transient SDK error, // and caching that for 60s would hide a freshly-started LM Studio process. if (value.length > 0) { this._downloadedCache = { value, expiresAt: now + ttlMs }; } return value.slice(); } async getModelHandle(modelKey: string, options?: { refresh?: boolean }): Promise { try { if (options?.refresh) { // Recreate the SDK + WebSocket so the SDK's internal handle // cache is dropped. The next llm.model() call mints a fresh // handle instead of returning the disposed one from the // previous (aborted) prediction. this._sdk = undefined; this._loadedCache = undefined; logInfo('LM Studio SDK handle refresh requested — dropped cached SDK client.', { modelKey }); } return await this.getSdk().llm.model(modelKey); } catch (e: any) { const msg = e?.message ?? String(e); throw new LMStudioLifecycleError(`Failed to acquire LM Studio model handle "${modelKey}": ${msg}`, e); } } async isReachable(): Promise { try { await this.getSdk().llm.listLoaded(); return true; } catch (e: any) { logError('LM Studio not reachable.', { error: e?.message ?? String(e) }); return false; } } }