import type { ILMStudioClient, LMStudioLoadConfig } from './client'; import type { IActivityTracker } from './activityTracker'; import type { EngineKind } from '../utils'; import type { ISystemSpecsProvider, IModelMemoryEstimator } from '../system/specs'; import { logError, logInfo } from '../utils'; export type LifecycleState = 'idle' | 'loading' | 'loaded' | 'streaming' | 'unloading'; export interface LifecycleConfig { idleTimeoutMs: number; autoLoadOnSelect: boolean; /** Forwarded to `llm.load()` config field. Omit to use engine defaults. */ loadConfig?: LMStudioLoadConfig; /** When set, the lifecycle manager pre-warms this draft model after every successful load. */ draftModel?: string; } export interface LifecycleManagerDeps { client: ILMStudioClient; activity: IActivityTracker; getConfig: () => LifecycleConfig; notifyError?: (msg: string) => void; /** Debounce window for rapid model switches. Default 300ms. Use 0 in tests for synchronous behavior. */ switchDebounceMs?: number; /** Initial engine. Default 'lmstudio'. */ initialEngine?: EngineKind; /** * Optional pre-load memory budget check. When both are provided, a warn-only * advisory is emitted via `notifyError` (and a structured log line) before * attempting to load a model that the heuristic predicts will not fit. * The load is **not** blocked — the user may have a quantization the * estimator does not recognize. */ systemSpecs?: ISystemSpecsProvider; memoryEstimator?: IModelMemoryEstimator; } export class ModelLifecycleManager { private state: LifecycleState = 'idle'; private currentModel: string | null = null; private pendingModel: string | null = null; private engine: EngineKind; private idleTimer: ReturnType | undefined; private switchDebounce: ReturnType | undefined; private loadAbort: AbortController | undefined; private readonly activitySub: { dispose(): void }; private disposed = false; constructor(private readonly deps: LifecycleManagerDeps) { this.engine = deps.initialEngine ?? 'lmstudio'; this.activitySub = deps.activity.onActivity(() => this.onActivity()); } setEngine(engine: EngineKind): void { if (engine === this.engine) return; const wasLmStudio = this.engine === 'lmstudio'; this.engine = engine; if (wasLmStudio && engine !== 'lmstudio') { this.clearIdleTimer(); this.cancelPendingSwitch(); this.cancelLoad(); this.state = 'idle'; this.currentModel = null; this.pendingModel = null; } } onModelSelected(modelKey: string): void { if (this.disposed) return; if (this.engine !== 'lmstudio') return; if (!this.deps.getConfig().autoLoadOnSelect) return; const trimmed = (modelKey || '').trim(); if (!trimmed) return; // Mid-stream: queue the latest selection, apply on streamEnd. if (this.state === 'streaming') { this.pendingModel = trimmed; return; } // Same model already in flight or active — keep timer fresh, no reload. if ((this.state === 'loaded' || this.state === 'loading') && this.currentModel === trimmed) { if (this.state === 'loaded') this.resetIdleTimer(); return; } this.cancelPendingSwitch(); const delay = this.deps.switchDebounceMs ?? 300; if (delay <= 0) { void this.doSwitch(trimmed); return; } this.switchDebounce = setTimeout(() => { this.switchDebounce = undefined; void this.doSwitch(trimmed); }, delay); } onStreamStart(): void { if (this.disposed) return; if (this.engine !== 'lmstudio') return; this.clearIdleTimer(); if (this.state === 'loaded') this.state = 'streaming'; } onStreamEnd(): void { if (this.disposed) return; if (this.engine !== 'lmstudio') return; if (this.state === 'streaming') { this.state = 'loaded'; if (this.pendingModel && this.pendingModel !== this.currentModel) { const next = this.pendingModel; this.pendingModel = null; void this.doSwitch(next); } else { this.pendingModel = null; this.resetIdleTimer(); } } } /** Best-effort eject before extension shutdown. Bounded by timeoutMs. */ async disposeAndUnload(timeoutMs: number = 2000): Promise { if (this.disposed) return; this.disposed = true; this.clearIdleTimer(); this.cancelPendingSwitch(); this.cancelLoad(); this.activitySub.dispose(); const shouldUnload = this.engine === 'lmstudio' && (this.state === 'loaded' || this.state === 'streaming') && this.currentModel !== null; if (!shouldUnload) { this.state = 'idle'; this.currentModel = null; return; } const target = this.currentModel as string; this.state = 'unloading'; try { await Promise.race([ this.deps.client.unload(target), new Promise((_, reject) => setTimeout(() => reject(new Error(`unload timed out after ${timeoutMs}ms`)), timeoutMs) ), ]); } catch (e: any) { logError('LM Studio unload during dispose failed.', { model: target, error: e?.message ?? String(e) }); } this.state = 'idle'; this.currentModel = null; } /** vscode.Disposable shape — fire and forget. */ dispose(): void { void this.disposeAndUnload(); } // Test/inspection helpers public _getState(): LifecycleState { return this.state; } public _getCurrentModel(): string | null { return this.currentModel; } public _hasIdleTimer(): boolean { return this.idleTimer !== undefined; } // ---------- internals ---------- private onActivity(): void { if (this.disposed) return; if (this.engine !== 'lmstudio') return; if (this.state !== 'loaded') return; this.resetIdleTimer(); } private clearIdleTimer(): void { if (this.idleTimer) { clearTimeout(this.idleTimer); this.idleTimer = undefined; } } private cancelPendingSwitch(): void { if (this.switchDebounce) { clearTimeout(this.switchDebounce); this.switchDebounce = undefined; } } private resetIdleTimer(): void { this.clearIdleTimer(); const ms = this.deps.getConfig().idleTimeoutMs; if (!Number.isFinite(ms) || ms <= 0) return; this.idleTimer = setTimeout(() => { this.idleTimer = undefined; void this.doIdleEject(); }, ms); } private async doIdleEject(): Promise { if (this.state !== 'loaded' || !this.currentModel) return; const target = this.currentModel; this.state = 'unloading'; try { await this.deps.client.unload(target); logInfo('LM Studio model auto-ejected after idle.', { model: target }); } catch (e: any) { logError('LM Studio auto-eject failed.', { model: target, error: e?.message ?? String(e) }); this.deps.notifyError?.(`LM Studio auto-eject failed: ${e?.message ?? e}`); } this.state = 'idle'; this.currentModel = null; } private cancelLoad(): void { if (this.loadAbort) { try { this.loadAbort.abort(); } catch { /* noop */ } this.loadAbort = undefined; } } /** * Warn-only RAM budget check. If the heuristic estimator says the model is * unlikely to fit, surface a non-blocking advisory and log it. The load * still proceeds — the heuristic can be wrong (unrecognized quantization, * sparse / MoE models) and the user may have explicit intent. */ private checkMemoryBudget(modelKey: string): void { const specsProvider = this.deps.systemSpecs; const estimator = this.deps.memoryEstimator; if (!specsProvider || !estimator) return; try { const specs = specsProvider.get(); const requiredGB = estimator.estimate(modelKey); if (requiredGB > specs.safeModelBudgetGB) { const msg = `Model "${modelKey}" estimated at ~${requiredGB.toFixed(1)}GB ` + `exceeds your safe RAM budget of ${specs.safeModelBudgetGB}GB. ` + `If load fails, try a smaller quantization (q4 / q5).`; logInfo('LM Studio pre-load memory advisory.', { model: modelKey, requiredGB: Number(requiredGB.toFixed(2)), budgetGB: specs.safeModelBudgetGB, totalRamGB: Number(specs.totalRamGB.toFixed(2)), }); this.deps.notifyError?.(msg); } } catch (e: any) { // Diagnostic-only; never block a load on advisory failures. logError('Memory budget check failed.', { error: e?.message ?? String(e) }); } } private async doSwitch(modelKey: string): Promise { if (this.disposed) return; if (this.engine !== 'lmstudio') return; this.cancelLoad(); this.clearIdleTimer(); // ── 1) Unload 이전 모델 (있으면) ────────────────────────────────────── // 의도: 메모리 회수. 실패해도 load 는 *무조건* 진행 — LM Studio 가 unload // 못 한 모델은 보통 그냥 그대로 메모리에 떠 있고, load 가 새 모델로 메모리를 // 덮어쓰면서 자연 회수되는 경우가 많다. 여기서 throw 하면 사용자가 모델 // 교체 자체를 못 함. // 또한 unload 실패해도 currentModel 은 null 로 정리 — 다음 단계에서 어차피 // modelKey 로 덮어쓰지만, 그 사이에 다른 코드가 currentModel 을 읽을 때 // "이미 없는 prev" 를 가리키지 않도록. if (this.state === 'loaded' && this.currentModel && this.currentModel !== modelKey) { const prev = this.currentModel; this.state = 'unloading'; try { await this.deps.client.unload(prev); } catch (e: any) { logError('LM Studio unload before switch failed — load 진행 강행.', { prev, error: e?.message ?? String(e) }); } this.currentModel = null; } this.checkMemoryBudget(modelKey); // ── 2) Load 새 모델 ─────────────────────────────────────────────────── this.state = 'loading'; this.currentModel = modelKey; const ac = new AbortController(); this.loadAbort = ac; try { const cfg = this.deps.getConfig(); await this.deps.client.load(modelKey, ac.signal, cfg.loadConfig); if (this.loadAbort !== ac) return; // superseded by a newer switch this.loadAbort = undefined; this.state = 'loaded'; this.resetIdleTimer(); // Pre-warm the draft model so the first speculative prediction doesn't pay a cold-load cost. if (cfg.draftModel && this.deps.client.preloadDraftModel) { void this.deps.client.preloadDraftModel(cfg.draftModel); } } catch (e: any) { if (ac.signal.aborted) { // 새 switch 가 우리를 abort 시킨 경우 → 그 switch 가 state 를 새로 정함. // 우리는 손대지 말고 빠진다. return; } logError('LM Studio model load failed.', { model: modelKey, error: e?.message ?? String(e) }); this.deps.notifyError?.(`LM Studio load failed: ${e?.message ?? e}`); if (this.loadAbort === ac) this.loadAbort = undefined; // Load 실패 → 어떤 모델도 안 떠 있는 깨끗한 상태로 복귀. 다음 호출이 같은 // 모델을 다시 시도할 수 있게 currentModel 도 비운다. this.state = 'idle'; this.currentModel = null; } } }