import type { ILMStudioClient } from './client'; import type { IActivityTracker } from './activityTracker'; import type { EngineKind } from '../utils'; import type { ISystemSpecsProvider, IModelMemoryEstimator } from '../system/specs'; import { logError, logInfo } from '../utils'; export type LifecycleState = 'idle' | 'loading' | 'loaded' | 'streaming' | 'unloading'; export interface LifecycleConfig { idleTimeoutMs: number; autoLoadOnSelect: boolean; } export interface LifecycleManagerDeps { client: ILMStudioClient; activity: IActivityTracker; getConfig: () => LifecycleConfig; notifyError?: (msg: string) => void; /** Debounce window for rapid model switches. Default 300ms. Use 0 in tests for synchronous behavior. */ switchDebounceMs?: number; /** Initial engine. Default 'lmstudio'. */ initialEngine?: EngineKind; /** * Optional pre-load memory budget check. When both are provided, a warn-only * advisory is emitted via `notifyError` (and a structured log line) before * attempting to load a model that the heuristic predicts will not fit. * The load is **not** blocked — the user may have a quantization the * estimator does not recognize. */ systemSpecs?: ISystemSpecsProvider; memoryEstimator?: IModelMemoryEstimator; } export class ModelLifecycleManager { private state: LifecycleState = 'idle'; private currentModel: string | null = null; private pendingModel: string | null = null; private engine: EngineKind; private idleTimer: ReturnType | undefined; private switchDebounce: ReturnType | undefined; private loadAbort: AbortController | undefined; private readonly activitySub: { dispose(): void }; private disposed = false; constructor(private readonly deps: LifecycleManagerDeps) { this.engine = deps.initialEngine ?? 'lmstudio'; this.activitySub = deps.activity.onActivity(() => this.onActivity()); } setEngine(engine: EngineKind): void { if (engine === this.engine) return; const wasLmStudio = this.engine === 'lmstudio'; this.engine = engine; if (wasLmStudio && engine !== 'lmstudio') { this.clearIdleTimer(); this.cancelPendingSwitch(); this.cancelLoad(); this.state = 'idle'; this.currentModel = null; this.pendingModel = null; } } onModelSelected(modelKey: string): void { if (this.disposed) return; if (this.engine !== 'lmstudio') return; if (!this.deps.getConfig().autoLoadOnSelect) return; const trimmed = (modelKey || '').trim(); if (!trimmed) return; // Mid-stream: queue the latest selection, apply on streamEnd. if (this.state === 'streaming') { this.pendingModel = trimmed; return; } // Same model already in flight or active — keep timer fresh, no reload. if ((this.state === 'loaded' || this.state === 'loading') && this.currentModel === trimmed) { if (this.state === 'loaded') this.resetIdleTimer(); return; } this.cancelPendingSwitch(); const delay = this.deps.switchDebounceMs ?? 300; if (delay <= 0) { void this.doSwitch(trimmed); return; } this.switchDebounce = setTimeout(() => { this.switchDebounce = undefined; void this.doSwitch(trimmed); }, delay); } onStreamStart(): void { if (this.disposed) return; if (this.engine !== 'lmstudio') return; this.clearIdleTimer(); if (this.state === 'loaded') this.state = 'streaming'; } onStreamEnd(): void { if (this.disposed) return; if (this.engine !== 'lmstudio') return; if (this.state === 'streaming') { this.state = 'loaded'; if (this.pendingModel && this.pendingModel !== this.currentModel) { const next = this.pendingModel; this.pendingModel = null; void this.doSwitch(next); } else { this.pendingModel = null; this.resetIdleTimer(); } } } /** Best-effort eject before extension shutdown. Bounded by timeoutMs. */ async disposeAndUnload(timeoutMs: number = 2000): Promise { if (this.disposed) return; this.disposed = true; this.clearIdleTimer(); this.cancelPendingSwitch(); this.cancelLoad(); this.activitySub.dispose(); const shouldUnload = this.engine === 'lmstudio' && (this.state === 'loaded' || this.state === 'streaming') && this.currentModel !== null; if (!shouldUnload) { this.state = 'idle'; this.currentModel = null; return; } const target = this.currentModel as string; this.state = 'unloading'; try { await Promise.race([ this.deps.client.unload(target), new Promise((_, reject) => setTimeout(() => reject(new Error(`unload timed out after ${timeoutMs}ms`)), timeoutMs) ), ]); } catch (e: any) { logError('LM Studio unload during dispose failed.', { model: target, error: e?.message ?? String(e) }); } this.state = 'idle'; this.currentModel = null; } /** vscode.Disposable shape — fire and forget. */ dispose(): void { void this.disposeAndUnload(); } // Test/inspection helpers public _getState(): LifecycleState { return this.state; } public _getCurrentModel(): string | null { return this.currentModel; } public _hasIdleTimer(): boolean { return this.idleTimer !== undefined; } // ---------- internals ---------- private onActivity(): void { if (this.disposed) return; if (this.engine !== 'lmstudio') return; if (this.state !== 'loaded') return; this.resetIdleTimer(); } private clearIdleTimer(): void { if (this.idleTimer) { clearTimeout(this.idleTimer); this.idleTimer = undefined; } } private cancelPendingSwitch(): void { if (this.switchDebounce) { clearTimeout(this.switchDebounce); this.switchDebounce = undefined; } } private resetIdleTimer(): void { this.clearIdleTimer(); const ms = this.deps.getConfig().idleTimeoutMs; if (!Number.isFinite(ms) || ms <= 0) return; this.idleTimer = setTimeout(() => { this.idleTimer = undefined; void this.doIdleEject(); }, ms); } private async doIdleEject(): Promise { if (this.state !== 'loaded' || !this.currentModel) return; const target = this.currentModel; this.state = 'unloading'; try { await this.deps.client.unload(target); logInfo('LM Studio model auto-ejected after idle.', { model: target }); } catch (e: any) { logError('LM Studio auto-eject failed.', { model: target, error: e?.message ?? String(e) }); this.deps.notifyError?.(`LM Studio auto-eject failed: ${e?.message ?? e}`); } this.state = 'idle'; this.currentModel = null; } private cancelLoad(): void { if (this.loadAbort) { try { this.loadAbort.abort(); } catch { /* noop */ } this.loadAbort = undefined; } } /** * Warn-only RAM budget check. If the heuristic estimator says the model is * unlikely to fit, surface a non-blocking advisory and log it. The load * still proceeds — the heuristic can be wrong (unrecognized quantization, * sparse / MoE models) and the user may have explicit intent. */ private checkMemoryBudget(modelKey: string): void { const specsProvider = this.deps.systemSpecs; const estimator = this.deps.memoryEstimator; if (!specsProvider || !estimator) return; try { const specs = specsProvider.get(); const requiredGB = estimator.estimate(modelKey); if (requiredGB > specs.safeModelBudgetGB) { const msg = `Model "${modelKey}" estimated at ~${requiredGB.toFixed(1)}GB ` + `exceeds your safe RAM budget of ${specs.safeModelBudgetGB}GB. ` + `If load fails, try a smaller quantization (q4 / q5).`; logInfo('LM Studio pre-load memory advisory.', { model: modelKey, requiredGB: Number(requiredGB.toFixed(2)), budgetGB: specs.safeModelBudgetGB, totalRamGB: Number(specs.totalRamGB.toFixed(2)), }); this.deps.notifyError?.(msg); } } catch (e: any) { // Diagnostic-only; never block a load on advisory failures. logError('Memory budget check failed.', { error: e?.message ?? String(e) }); } } private async doSwitch(modelKey: string): Promise { if (this.disposed) return; if (this.engine !== 'lmstudio') return; this.cancelLoad(); this.clearIdleTimer(); if (this.state === 'loaded' && this.currentModel && this.currentModel !== modelKey) { const prev = this.currentModel; this.state = 'unloading'; try { await this.deps.client.unload(prev); } catch (e: any) { logError('LM Studio unload before switch failed.', { prev, error: e?.message ?? String(e) }); } this.currentModel = null; } this.checkMemoryBudget(modelKey); this.state = 'loading'; this.currentModel = modelKey; const ac = new AbortController(); this.loadAbort = ac; try { await this.deps.client.load(modelKey, ac.signal); if (this.loadAbort !== ac) return; // superseded by a newer switch this.loadAbort = undefined; this.state = 'loaded'; this.resetIdleTimer(); } catch (e: any) { if (ac.signal.aborted) return; // superseded — newer switch owns state logError('LM Studio model load failed.', { model: modelKey, error: e?.message ?? String(e) }); this.deps.notifyError?.(`LM Studio load failed: ${e?.message ?? e}`); if (this.loadAbort === ac) this.loadAbort = undefined; this.state = 'idle'; this.currentModel = null; } } }