connectai/src/lmstudio/client.ts

import { LMStudioClient as SDKClient, LLM } from '@lmstudio/sdk';
import { logError, logInfo } from '../utils';

export interface ILMStudioClient {
    load(modelKey: string, signal?: AbortSignal): Promise<void>;
    unload(modelKey: string): Promise<void>;
    listLoaded(): Promise<string[]>;
    /** Like listLoaded() but caches the result for `ttlMs` to avoid hammering the SDK. */
    listLoadedCached(ttlMs?: number): Promise<string[]>;
    /**
     * Resolve a chat-ready handle for an already-loaded (or just-loaded) model.
     *
     * `options.refresh: true` drops the SDK + WebSocket so any disposed handle
     * sitting in the SDK's internal handle map is discarded. Use this after a
     * "Model is disposed!" or "lock() request could not be registered" error.
     */
    getModelHandle(modelKey: string, options?: { refresh?: boolean }): Promise<LLM>;
    isReachable(): Promise<boolean>;
    setBaseUrl(httpBaseUrl: string): void;
}

export class LMStudioLifecycleError extends Error {
    constructor(message: string, public readonly cause?: unknown) {
        super(message);
        this.name = 'LMStudioLifecycleError';
    }
}

export function httpToWebSocketUrl(httpBaseUrl: string): string | undefined {
    const trimmed = (httpBaseUrl || '').trim();
    if (!trimmed) return undefined;
    try {
        const url = new URL(trimmed);
        if (url.protocol === 'http:') url.protocol = 'ws:';
        else if (url.protocol === 'https:') url.protocol = 'wss:';
        else if (url.protocol !== 'ws:' && url.protocol !== 'wss:') return undefined;
        if (url.pathname.endsWith('/v1')) url.pathname = url.pathname.slice(0, -3);
        if (url.pathname.endsWith('/api')) url.pathname = url.pathname.slice(0, -4);
        const out = url.toString().replace(/\/+$/, '');
        return out;
    } catch {
        return undefined;
    }
}

export class LMStudioClient implements ILMStudioClient {
    private _sdk: SDKClient | undefined;
    private _wsUrl: string | undefined;
    private _loadedCache: { value: string[]; expiresAt: number } | undefined;
    private static readonly DEFAULT_LOADED_CACHE_TTL_MS = 5000;

    constructor(httpBaseUrl: string) {
        this.setBaseUrl(httpBaseUrl);
    }

    setBaseUrl(httpBaseUrl: string): void {
        const ws = httpToWebSocketUrl(httpBaseUrl);
        if (ws !== this._wsUrl) {
            this._wsUrl = ws;
            this._sdk = undefined;
            this._loadedCache = undefined;
        }
    }

    private getSdk(): SDKClient {
        if (!this._sdk) {
            this._sdk = new SDKClient(this._wsUrl ? { baseUrl: this._wsUrl } : {});
        }
        return this._sdk;
    }

    async load(modelKey: string, signal?: AbortSignal): Promise<void> {
        try {
            await this.getSdk().llm.load(modelKey, signal ? { signal } : undefined);
            this._loadedCache = undefined;
            logInfo('LM Studio model loaded.', { modelKey });
        } catch (e: any) {
            const msg = e?.message ?? String(e);
            throw new LMStudioLifecycleError(`Failed to load LM Studio model "${modelKey}": ${msg}`, e);
        }
    }

    async unload(modelKey: string): Promise<void> {
        try {
            await this.getSdk().llm.unload(modelKey);
            this._loadedCache = undefined;
            logInfo('LM Studio model unloaded.', { modelKey });
        } catch (e: any) {
            const msg = e?.message ?? String(e);
            throw new LMStudioLifecycleError(`Failed to unload LM Studio model "${modelKey}": ${msg}`, e);
        }
    }

    async listLoaded(): Promise<string[]> {
        try {
            const items: any[] = await this.getSdk().llm.listLoaded();
            return items
                .map((m) => m?.identifier ?? m?.modelKey ?? m?.path ?? null)
                .filter((id): id is string => typeof id === 'string' && id.length > 0);
        } catch (e: any) {
            const msg = e?.message ?? String(e);
            throw new LMStudioLifecycleError(`Failed to list loaded LM Studio models: ${msg}`, e);
        }
    }

    async listLoadedCached(ttlMs: number = LMStudioClient.DEFAULT_LOADED_CACHE_TTL_MS): Promise<string[]> {
        const now = Date.now();
        if (this._loadedCache && this._loadedCache.expiresAt > now) {
            return this._loadedCache.value.slice();
        }
        try {
            const value = await this.listLoaded();
            this._loadedCache = { value, expiresAt: now + ttlMs };
            return value.slice();
        } catch {
            return [];
        }
    }

    async getModelHandle(modelKey: string, options?: { refresh?: boolean }): Promise<LLM> {
        try {
            if (options?.refresh) {
                // Recreate the SDK + WebSocket so the SDK's internal handle
                // cache is dropped. The next llm.model() call mints a fresh
                // handle instead of returning the disposed one from the
                // previous (aborted) prediction.
                this._sdk = undefined;
                this._loadedCache = undefined;
                logInfo('LM Studio SDK handle refresh requested — dropped cached SDK client.', { modelKey });
            }
            return await this.getSdk().llm.model(modelKey);
        } catch (e: any) {
            const msg = e?.message ?? String(e);
            throw new LMStudioLifecycleError(`Failed to acquire LM Studio model handle "${modelKey}": ${msg}`, e);
        }
    }

    async isReachable(): Promise<boolean> {
        try {
            await this.getSdk().llm.listLoaded();
            return true;
        } catch (e: any) {
            logError('LM Studio not reachable.', { error: e?.message ?? String(e) });
            return false;
        }
    }
}