Files
connectai/src/lmstudio/client.ts
T
g1nation 0712014fcb chore: v2.2.73 — ASTRA-DEBUG 로그 레벨 + webview CSP font-src 보강
- ASTRA-DEBUG 정상 흐름 로그를 console.error → logInfo/console.log 로 강등
  (chatHandlers, extension, slashRouter): DevTools에 ERR로 찍히던 오탐 제거
- sidebar webview에 명시적 CSP meta 추가 + font-src에 data: 허용
  (sidebar.html, sidebarProvider._getHtml): VS Code outer iframe이 codicon.ttf를
  data:font/ttf 로 inject하면서 기본 CSP에 막혀 매 prompt 마다 violation
  경고가 찍히던 문제 해소
- 누적된 LM Studio / agent / 컨텍스트 매니저 / 테스트 갱신 동반

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-23 15:52:19 +09:00

256 lines
11 KiB
TypeScript

import { LMStudioClient as SDKClient, LLM, type LLMLoadModelConfig } from '@lmstudio/sdk';
import { logError, logInfo } from '../utils';
/** Load-time options forwarded to LM Studio's `llm.load()`. Subset of `LLMLoadModelConfig`. */
export interface LMStudioLoadConfig {
flashAttention?: boolean;
/** "max" | "off" | number 0-1 */
gpuOffloadRatio?: 'max' | 'off' | number;
offloadKVCacheToGpu?: boolean;
keepModelInMemory?: boolean;
useFp16ForKVCache?: boolean;
/** 0 / undefined = engine default */
evalBatchSize?: number;
}
export interface ILMStudioClient {
load(modelKey: string, signal?: AbortSignal, loadConfig?: LMStudioLoadConfig): Promise<void>;
unload(modelKey: string): Promise<void>;
listLoaded(): Promise<string[]>;
/** Like listLoaded() but caches the result for `ttlMs` to avoid hammering the SDK. */
listLoadedCached(ttlMs?: number): Promise<string[]>;
/**
* List every LLM the user has downloaded into LM Studio, regardless of
* whether it is currently loaded. Returns the SDK `modelKey` of each entry —
* the exact identifier `llm.load()` accepts. Use this for the dropdown so
* the list does not depend on LM Studio's JIT setting (REST `/v1/models`
* only returns loaded models when JIT is off).
*/
listDownloaded(): Promise<string[]>;
/** Cached variant; the downloaded list only changes when the user installs/removes a model. */
listDownloadedCached(ttlMs?: number): Promise<string[]>;
/** Pre-warm a draft model for speculative decoding. Idempotent + best-effort. */
preloadDraftModel?(draftModelKey: string): Promise<void>;
/**
* Resolve a chat-ready handle for an already-loaded (or just-loaded) model.
*
* `options.refresh: true` drops the SDK + WebSocket so any disposed handle
* sitting in the SDK's internal handle map is discarded. Use this after a
* "Model is disposed!" or "lock() request could not be registered" error.
*/
getModelHandle(modelKey: string, options?: { refresh?: boolean }): Promise<LLM>;
isReachable(): Promise<boolean>;
setBaseUrl(httpBaseUrl: string): void;
}
export class LMStudioLifecycleError extends Error {
constructor(message: string, public readonly cause?: unknown) {
super(message);
this.name = 'LMStudioLifecycleError';
}
}
export function httpToWebSocketUrl(httpBaseUrl: string): string | undefined {
const trimmed = (httpBaseUrl || '').trim();
if (!trimmed) return undefined;
try {
const url = new URL(trimmed);
if (url.protocol === 'http:') url.protocol = 'ws:';
else if (url.protocol === 'https:') url.protocol = 'wss:';
else if (url.protocol !== 'ws:' && url.protocol !== 'wss:') return undefined;
// Strip every REST-only path suffix LM Studio ships with so the SDK lands on the
// WebSocket root. Loop because /api/v0 → /api → '' should fully unwind.
const REST_SUFFIXES = ['/api/v0', '/api/v1', '/v1', '/api'];
let changed = true;
while (changed) {
changed = false;
for (const suffix of REST_SUFFIXES) {
if (url.pathname.endsWith(suffix)) {
url.pathname = url.pathname.slice(0, -suffix.length);
changed = true;
break;
}
}
}
const out = url.toString().replace(/\/+$/, '');
return out;
} catch {
return undefined;
}
}
export class LMStudioClient implements ILMStudioClient {
private _sdk: SDKClient | undefined;
private _wsUrl: string | undefined;
private _loadedCache: { value: string[]; expiresAt: number } | undefined;
private _downloadedCache: { value: string[]; expiresAt: number } | undefined;
private static readonly DEFAULT_LOADED_CACHE_TTL_MS = 5000;
private static readonly DEFAULT_DOWNLOADED_CACHE_TTL_MS = 60_000;
constructor(httpBaseUrl: string) {
this.setBaseUrl(httpBaseUrl);
}
setBaseUrl(httpBaseUrl: string): void {
const ws = httpToWebSocketUrl(httpBaseUrl);
if (ws !== this._wsUrl) {
this._wsUrl = ws;
this._sdk = undefined;
this._loadedCache = undefined;
this._downloadedCache = undefined;
}
}
private getSdk(): SDKClient {
if (!this._sdk) {
this._sdk = new SDKClient(this._wsUrl ? { baseUrl: this._wsUrl } : {});
}
return this._sdk;
}
async load(modelKey: string, signal?: AbortSignal, loadConfig?: LMStudioLoadConfig): Promise<void> {
try {
const opts: { signal?: AbortSignal; config?: LLMLoadModelConfig } = {};
if (signal) opts.signal = signal;
const config = this._buildLoadConfig(loadConfig);
if (Object.keys(config).length > 0) opts.config = config;
await this.getSdk().llm.load(modelKey, Object.keys(opts).length > 0 ? opts : undefined);
this._loadedCache = undefined;
// Loading does not change the downloaded-models set; leave _downloadedCache alone.
logInfo('LM Studio model loaded.', { modelKey, configKeys: Object.keys(config) });
} catch (e: any) {
const msg = e?.message ?? String(e);
throw new LMStudioLifecycleError(`Failed to load LM Studio model "${modelKey}": ${msg}`, e);
}
}
/** Translate our flat LMStudioLoadConfig into LM Studio's nested LLMLoadModelConfig shape. */
private _buildLoadConfig(lc: LMStudioLoadConfig | undefined): LLMLoadModelConfig {
const out: LLMLoadModelConfig = {};
if (!lc) return out;
if (typeof lc.flashAttention === 'boolean') out.flashAttention = lc.flashAttention;
if (typeof lc.offloadKVCacheToGpu === 'boolean') out.offloadKVCacheToGpu = lc.offloadKVCacheToGpu;
if (typeof lc.keepModelInMemory === 'boolean') out.keepModelInMemory = lc.keepModelInMemory;
if (typeof lc.useFp16ForKVCache === 'boolean') out.useFp16ForKVCache = lc.useFp16ForKVCache;
if (typeof lc.evalBatchSize === 'number' && lc.evalBatchSize > 0) out.evalBatchSize = lc.evalBatchSize;
if (lc.gpuOffloadRatio !== undefined) {
// GPUSetting is deprecated but still accepted — wraps a single `ratio`.
out.gpu = { ratio: lc.gpuOffloadRatio as any };
}
return out;
}
async preloadDraftModel(draftModelKey: string): Promise<void> {
const key = (draftModelKey || '').trim();
if (!key) return;
try {
const llm: any = this.getSdk().llm;
if (typeof llm.unstable_preloadDraftModel === 'function') {
await llm.unstable_preloadDraftModel(key);
logInfo('LM Studio draft model preloaded.', { draftModelKey: key });
}
} catch (e: any) {
// Best-effort — the main model's respond({draftModel}) will still load it lazily.
logError('LM Studio draft model preload failed.', { draftModelKey: key, error: e?.message ?? String(e) });
}
}
async unload(modelKey: string): Promise<void> {
try {
await this.getSdk().llm.unload(modelKey);
this._loadedCache = undefined;
logInfo('LM Studio model unloaded.', { modelKey });
} catch (e: any) {
const msg = e?.message ?? String(e);
throw new LMStudioLifecycleError(`Failed to unload LM Studio model "${modelKey}": ${msg}`, e);
}
}
/** Force the next downloaded/loaded-models call to re-fetch (use after install / remove). */
invalidateCaches(): void {
this._loadedCache = undefined;
this._downloadedCache = undefined;
}
async listLoaded(): Promise<string[]> {
try {
const items: any[] = await this.getSdk().llm.listLoaded();
return items
.map((m) => m?.identifier ?? m?.modelKey ?? m?.path ?? null)
.filter((id): id is string => typeof id === 'string' && id.length > 0);
} catch (e: any) {
const msg = e?.message ?? String(e);
throw new LMStudioLifecycleError(`Failed to list loaded LM Studio models: ${msg}`, e);
}
}
async listLoadedCached(ttlMs: number = LMStudioClient.DEFAULT_LOADED_CACHE_TTL_MS): Promise<string[]> {
const now = Date.now();
if (this._loadedCache && this._loadedCache.expiresAt > now) {
return this._loadedCache.value.slice();
}
try {
const value = await this.listLoaded();
this._loadedCache = { value, expiresAt: now + ttlMs };
return value.slice();
} catch {
return [];
}
}
async listDownloaded(): Promise<string[]> {
try {
const items: any[] = await this.getSdk().system.listDownloadedModels('llm');
return items
.map((m) => m?.modelKey ?? null)
.filter((k): k is string => typeof k === 'string' && k.length > 0);
} catch (e: any) {
const msg = e?.message ?? String(e);
logError('Failed to list downloaded LM Studio models.', { error: msg });
return [];
}
}
async listDownloadedCached(ttlMs: number = LMStudioClient.DEFAULT_DOWNLOADED_CACHE_TTL_MS): Promise<string[]> {
const now = Date.now();
if (this._downloadedCache && this._downloadedCache.expiresAt > now) {
return this._downloadedCache.value.slice();
}
const value = await this.listDownloaded();
// Only cache non-empty results — an empty array often signals a transient SDK error,
// and caching that for 60s would hide a freshly-started LM Studio process.
if (value.length > 0) {
this._downloadedCache = { value, expiresAt: now + ttlMs };
}
return value.slice();
}
async getModelHandle(modelKey: string, options?: { refresh?: boolean }): Promise<LLM> {
try {
if (options?.refresh) {
// Recreate the SDK + WebSocket so the SDK's internal handle
// cache is dropped. The next llm.model() call mints a fresh
// handle instead of returning the disposed one from the
// previous (aborted) prediction.
this._sdk = undefined;
this._loadedCache = undefined;
logInfo('LM Studio SDK handle refresh requested — dropped cached SDK client.', { modelKey });
}
return await this.getSdk().llm.model(modelKey);
} catch (e: any) {
const msg = e?.message ?? String(e);
throw new LMStudioLifecycleError(`Failed to acquire LM Studio model handle "${modelKey}": ${msg}`, e);
}
}
async isReachable(): Promise<boolean> {
try {
await this.getSdk().llm.listLoaded();
return true;
} catch (e: any) {
logError('LM Studio not reachable.', { error: e?.message ?? String(e) });
return false;
}
}
}