148 lines
5.6 KiB
TypeScript
148 lines
5.6 KiB
TypeScript
import { LMStudioClient as SDKClient, LLM } from '@lmstudio/sdk';
|
|
import { logError, logInfo } from '../utils';
|
|
|
|
export interface ILMStudioClient {
|
|
load(modelKey: string, signal?: AbortSignal): Promise<void>;
|
|
unload(modelKey: string): Promise<void>;
|
|
listLoaded(): Promise<string[]>;
|
|
/** Like listLoaded() but caches the result for `ttlMs` to avoid hammering the SDK. */
|
|
listLoadedCached(ttlMs?: number): Promise<string[]>;
|
|
/**
|
|
* Resolve a chat-ready handle for an already-loaded (or just-loaded) model.
|
|
*
|
|
* `options.refresh: true` drops the SDK + WebSocket so any disposed handle
|
|
* sitting in the SDK's internal handle map is discarded. Use this after a
|
|
* "Model is disposed!" or "lock() request could not be registered" error.
|
|
*/
|
|
getModelHandle(modelKey: string, options?: { refresh?: boolean }): Promise<LLM>;
|
|
isReachable(): Promise<boolean>;
|
|
setBaseUrl(httpBaseUrl: string): void;
|
|
}
|
|
|
|
export class LMStudioLifecycleError extends Error {
|
|
constructor(message: string, public readonly cause?: unknown) {
|
|
super(message);
|
|
this.name = 'LMStudioLifecycleError';
|
|
}
|
|
}
|
|
|
|
export function httpToWebSocketUrl(httpBaseUrl: string): string | undefined {
|
|
const trimmed = (httpBaseUrl || '').trim();
|
|
if (!trimmed) return undefined;
|
|
try {
|
|
const url = new URL(trimmed);
|
|
if (url.protocol === 'http:') url.protocol = 'ws:';
|
|
else if (url.protocol === 'https:') url.protocol = 'wss:';
|
|
else if (url.protocol !== 'ws:' && url.protocol !== 'wss:') return undefined;
|
|
if (url.pathname.endsWith('/v1')) url.pathname = url.pathname.slice(0, -3);
|
|
if (url.pathname.endsWith('/api')) url.pathname = url.pathname.slice(0, -4);
|
|
const out = url.toString().replace(/\/+$/, '');
|
|
return out;
|
|
} catch {
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
export class LMStudioClient implements ILMStudioClient {
|
|
private _sdk: SDKClient | undefined;
|
|
private _wsUrl: string | undefined;
|
|
private _loadedCache: { value: string[]; expiresAt: number } | undefined;
|
|
private static readonly DEFAULT_LOADED_CACHE_TTL_MS = 5000;
|
|
|
|
constructor(httpBaseUrl: string) {
|
|
this.setBaseUrl(httpBaseUrl);
|
|
}
|
|
|
|
setBaseUrl(httpBaseUrl: string): void {
|
|
const ws = httpToWebSocketUrl(httpBaseUrl);
|
|
if (ws !== this._wsUrl) {
|
|
this._wsUrl = ws;
|
|
this._sdk = undefined;
|
|
this._loadedCache = undefined;
|
|
}
|
|
}
|
|
|
|
private getSdk(): SDKClient {
|
|
if (!this._sdk) {
|
|
this._sdk = new SDKClient(this._wsUrl ? { baseUrl: this._wsUrl } : {});
|
|
}
|
|
return this._sdk;
|
|
}
|
|
|
|
async load(modelKey: string, signal?: AbortSignal): Promise<void> {
|
|
try {
|
|
await this.getSdk().llm.load(modelKey, signal ? { signal } : undefined);
|
|
this._loadedCache = undefined;
|
|
logInfo('LM Studio model loaded.', { modelKey });
|
|
} catch (e: any) {
|
|
const msg = e?.message ?? String(e);
|
|
throw new LMStudioLifecycleError(`Failed to load LM Studio model "${modelKey}": ${msg}`, e);
|
|
}
|
|
}
|
|
|
|
async unload(modelKey: string): Promise<void> {
|
|
try {
|
|
await this.getSdk().llm.unload(modelKey);
|
|
this._loadedCache = undefined;
|
|
logInfo('LM Studio model unloaded.', { modelKey });
|
|
} catch (e: any) {
|
|
const msg = e?.message ?? String(e);
|
|
throw new LMStudioLifecycleError(`Failed to unload LM Studio model "${modelKey}": ${msg}`, e);
|
|
}
|
|
}
|
|
|
|
async listLoaded(): Promise<string[]> {
|
|
try {
|
|
const items: any[] = await this.getSdk().llm.listLoaded();
|
|
return items
|
|
.map((m) => m?.identifier ?? m?.modelKey ?? m?.path ?? null)
|
|
.filter((id): id is string => typeof id === 'string' && id.length > 0);
|
|
} catch (e: any) {
|
|
const msg = e?.message ?? String(e);
|
|
throw new LMStudioLifecycleError(`Failed to list loaded LM Studio models: ${msg}`, e);
|
|
}
|
|
}
|
|
|
|
async listLoadedCached(ttlMs: number = LMStudioClient.DEFAULT_LOADED_CACHE_TTL_MS): Promise<string[]> {
|
|
const now = Date.now();
|
|
if (this._loadedCache && this._loadedCache.expiresAt > now) {
|
|
return this._loadedCache.value.slice();
|
|
}
|
|
try {
|
|
const value = await this.listLoaded();
|
|
this._loadedCache = { value, expiresAt: now + ttlMs };
|
|
return value.slice();
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
async getModelHandle(modelKey: string, options?: { refresh?: boolean }): Promise<LLM> {
|
|
try {
|
|
if (options?.refresh) {
|
|
// Recreate the SDK + WebSocket so the SDK's internal handle
|
|
// cache is dropped. The next llm.model() call mints a fresh
|
|
// handle instead of returning the disposed one from the
|
|
// previous (aborted) prediction.
|
|
this._sdk = undefined;
|
|
this._loadedCache = undefined;
|
|
logInfo('LM Studio SDK handle refresh requested — dropped cached SDK client.', { modelKey });
|
|
}
|
|
return await this.getSdk().llm.model(modelKey);
|
|
} catch (e: any) {
|
|
const msg = e?.message ?? String(e);
|
|
throw new LMStudioLifecycleError(`Failed to acquire LM Studio model handle "${modelKey}": ${msg}`, e);
|
|
}
|
|
}
|
|
|
|
async isReachable(): Promise<boolean> {
|
|
try {
|
|
await this.getSdk().llm.listLoaded();
|
|
return true;
|
|
} catch (e: any) {
|
|
logError('LM Studio not reachable.', { error: e?.message ?? String(e) });
|
|
return false;
|
|
}
|
|
}
|
|
}
|