connectai/src/lmstudio/lifecycleManager.ts

import type { ILMStudioClient, LMStudioLoadConfig } from './client';
import type { IActivityTracker } from './activityTracker';
import type { EngineKind } from '../utils';
import type { ISystemSpecsProvider, IModelMemoryEstimator } from '../system/specs';
import { logError, logInfo } from '../utils';

export type LifecycleState = 'idle' | 'loading' | 'loaded' | 'streaming' | 'unloading';

export interface LifecycleConfig {
    idleTimeoutMs: number;
    autoLoadOnSelect: boolean;
    /** Forwarded to `llm.load()` config field. Omit to use engine defaults. */
    loadConfig?: LMStudioLoadConfig;
    /** When set, the lifecycle manager pre-warms this draft model after every successful load. */
    draftModel?: string;
}

export interface LifecycleManagerDeps {
    client: ILMStudioClient;
    activity: IActivityTracker;
    getConfig: () => LifecycleConfig;
    notifyError?: (msg: string) => void;
    /** Debounce window for rapid model switches. Default 300ms. Use 0 in tests for synchronous behavior. */
    switchDebounceMs?: number;
    /** Initial engine. Default 'lmstudio'. */
    initialEngine?: EngineKind;
    /**
     * Optional pre-load memory budget check. When both are provided, a warn-only
     * advisory is emitted via `notifyError` (and a structured log line) before
     * attempting to load a model that the heuristic predicts will not fit.
     * The load is **not** blocked — the user may have a quantization the
     * estimator does not recognize.
     */
    systemSpecs?: ISystemSpecsProvider;
    memoryEstimator?: IModelMemoryEstimator;
}

export class ModelLifecycleManager {
    private state: LifecycleState = 'idle';
    private currentModel: string | null = null;
    private pendingModel: string | null = null;
    private engine: EngineKind;

    private idleTimer: ReturnType<typeof setTimeout> | undefined;
    private switchDebounce: ReturnType<typeof setTimeout> | undefined;
    private loadAbort: AbortController | undefined;

    private readonly activitySub: { dispose(): void };
    private disposed = false;

    constructor(private readonly deps: LifecycleManagerDeps) {
        this.engine = deps.initialEngine ?? 'lmstudio';
        this.activitySub = deps.activity.onActivity(() => this.onActivity());
    }

    setEngine(engine: EngineKind): void {
        if (engine === this.engine) return;
        const wasLmStudio = this.engine === 'lmstudio';
        this.engine = engine;
        if (wasLmStudio && engine !== 'lmstudio') {
            this.clearIdleTimer();
            this.cancelPendingSwitch();
            this.cancelLoad();
            this.state = 'idle';
            this.currentModel = null;
            this.pendingModel = null;
        }
    }

    onModelSelected(modelKey: string): void {
        if (this.disposed) return;
        if (this.engine !== 'lmstudio') return;
        if (!this.deps.getConfig().autoLoadOnSelect) return;
        const trimmed = (modelKey || '').trim();
        if (!trimmed) return;

        // Mid-stream: queue the latest selection, apply on streamEnd.
        if (this.state === 'streaming') {
            this.pendingModel = trimmed;
            return;
        }

        // Same model already in flight or active — keep timer fresh, no reload.
        if ((this.state === 'loaded' || this.state === 'loading') && this.currentModel === trimmed) {
            if (this.state === 'loaded') this.resetIdleTimer();
            return;
        }

        this.cancelPendingSwitch();
        const delay = this.deps.switchDebounceMs ?? 300;
        if (delay <= 0) {
            void this.doSwitch(trimmed);
            return;
        }
        this.switchDebounce = setTimeout(() => {
            this.switchDebounce = undefined;
            void this.doSwitch(trimmed);
        }, delay);
    }

    onStreamStart(): void {
        if (this.disposed) return;
        if (this.engine !== 'lmstudio') return;
        this.clearIdleTimer();
        if (this.state === 'loaded') this.state = 'streaming';
    }

    onStreamEnd(): void {
        if (this.disposed) return;
        if (this.engine !== 'lmstudio') return;
        if (this.state === 'streaming') {
            this.state = 'loaded';
            if (this.pendingModel && this.pendingModel !== this.currentModel) {
                const next = this.pendingModel;
                this.pendingModel = null;
                void this.doSwitch(next);
            } else {
                this.pendingModel = null;
                this.resetIdleTimer();
            }
        }
    }

    /** Best-effort eject before extension shutdown. Bounded by timeoutMs. */
    async disposeAndUnload(timeoutMs: number = 2000): Promise<void> {
        if (this.disposed) return;
        this.disposed = true;
        this.clearIdleTimer();
        this.cancelPendingSwitch();
        this.cancelLoad();
        this.activitySub.dispose();

        const shouldUnload =
            this.engine === 'lmstudio' &&
            (this.state === 'loaded' || this.state === 'streaming') &&
            this.currentModel !== null;
        if (!shouldUnload) {
            this.state = 'idle';
            this.currentModel = null;
            return;
        }

        const target = this.currentModel as string;
        this.state = 'unloading';
        try {
            await Promise.race([
                this.deps.client.unload(target),
                new Promise<void>((_, reject) =>
                    setTimeout(() => reject(new Error(`unload timed out after ${timeoutMs}ms`)), timeoutMs)
                ),
            ]);
        } catch (e: any) {
            logError('LM Studio unload during dispose failed.', { model: target, error: e?.message ?? String(e) });
        }
        this.state = 'idle';
        this.currentModel = null;
    }

    /** vscode.Disposable shape — fire and forget. */
    dispose(): void {
        void this.disposeAndUnload();
    }

    // Test/inspection helpers
    public _getState(): LifecycleState { return this.state; }
    public _getCurrentModel(): string | null { return this.currentModel; }
    public _hasIdleTimer(): boolean { return this.idleTimer !== undefined; }

    // ---------- internals ----------

    private onActivity(): void {
        if (this.disposed) return;
        if (this.engine !== 'lmstudio') return;
        if (this.state !== 'loaded') return;
        this.resetIdleTimer();
    }

    private clearIdleTimer(): void {
        if (this.idleTimer) {
            clearTimeout(this.idleTimer);
            this.idleTimer = undefined;
        }
    }

    private cancelPendingSwitch(): void {
        if (this.switchDebounce) {
            clearTimeout(this.switchDebounce);
            this.switchDebounce = undefined;
        }
    }

    private resetIdleTimer(): void {
        this.clearIdleTimer();
        const ms = this.deps.getConfig().idleTimeoutMs;
        if (!Number.isFinite(ms) || ms <= 0) return;
        this.idleTimer = setTimeout(() => {
            this.idleTimer = undefined;
            void this.doIdleEject();
        }, ms);
    }

    private async doIdleEject(): Promise<void> {
        if (this.state !== 'loaded' || !this.currentModel) return;
        const target = this.currentModel;
        this.state = 'unloading';
        try {
            await this.deps.client.unload(target);
            logInfo('LM Studio model auto-ejected after idle.', { model: target });
        } catch (e: any) {
            logError('LM Studio auto-eject failed.', { model: target, error: e?.message ?? String(e) });
            this.deps.notifyError?.(`LM Studio auto-eject failed: ${e?.message ?? e}`);
        }
        this.state = 'idle';
        this.currentModel = null;
    }

    private cancelLoad(): void {
        if (this.loadAbort) {
            try { this.loadAbort.abort(); } catch { /* noop */ }
            this.loadAbort = undefined;
        }
    }

    /**
     * Warn-only RAM budget check. If the heuristic estimator says the model is
     * unlikely to fit, surface a non-blocking advisory and log it. The load
     * still proceeds — the heuristic can be wrong (unrecognized quantization,
     * sparse / MoE models) and the user may have explicit intent.
     */
    private checkMemoryBudget(modelKey: string): void {
        const specsProvider = this.deps.systemSpecs;
        const estimator = this.deps.memoryEstimator;
        if (!specsProvider || !estimator) return;
        try {
            const specs = specsProvider.get();
            const requiredGB = estimator.estimate(modelKey);
            if (requiredGB > specs.safeModelBudgetGB) {
                const msg =
                    `Model "${modelKey}" estimated at ~${requiredGB.toFixed(1)}GB ` +
                    `exceeds your safe RAM budget of ${specs.safeModelBudgetGB}GB. ` +
                    `If load fails, try a smaller quantization (q4 / q5).`;
                logInfo('LM Studio pre-load memory advisory.', {
                    model: modelKey,
                    requiredGB: Number(requiredGB.toFixed(2)),
                    budgetGB: specs.safeModelBudgetGB,
                    totalRamGB: Number(specs.totalRamGB.toFixed(2)),
                });
                this.deps.notifyError?.(msg);
            }
        } catch (e: any) {
            // Diagnostic-only; never block a load on advisory failures.
            logError('Memory budget check failed.', { error: e?.message ?? String(e) });
        }
    }

    private async doSwitch(modelKey: string): Promise<void> {
        if (this.disposed) return;
        if (this.engine !== 'lmstudio') return;

        this.cancelLoad();
        this.clearIdleTimer();

        // ── 1) Unload 이전 모델 (있으면) ──────────────────────────────────────
        // 의도: 메모리 회수. 실패해도 load 는 *무조건* 진행 — LM Studio 가 unload
        // 못 한 모델은 보통 그냥 그대로 메모리에 떠 있고, load 가 새 모델로 메모리를
        // 덮어쓰면서 자연 회수되는 경우가 많다. 여기서 throw 하면 사용자가 모델
        // 교체 자체를 못 함.
        // 또한 unload 실패해도 currentModel 은 null 로 정리 — 다음 단계에서 어차피
        // modelKey 로 덮어쓰지만, 그 사이에 다른 코드가 currentModel 을 읽을 때
        // "이미 없는 prev" 를 가리키지 않도록.
        if (this.state === 'loaded' && this.currentModel && this.currentModel !== modelKey) {
            const prev = this.currentModel;
            this.state = 'unloading';
            try {
                await this.deps.client.unload(prev);
            } catch (e: any) {
                logError('LM Studio unload before switch failed — load 진행 강행.', { prev, error: e?.message ?? String(e) });
            }
            this.currentModel = null;
        }

        this.checkMemoryBudget(modelKey);

        // ── 2) Load 새 모델 ───────────────────────────────────────────────────
        this.state = 'loading';
        this.currentModel = modelKey;
        const ac = new AbortController();
        this.loadAbort = ac;
        try {
            const cfg = this.deps.getConfig();
            await this.deps.client.load(modelKey, ac.signal, cfg.loadConfig);
            if (this.loadAbort !== ac) return; // superseded by a newer switch
            this.loadAbort = undefined;
            this.state = 'loaded';
            this.resetIdleTimer();
            // Pre-warm the draft model so the first speculative prediction doesn't pay a cold-load cost.
            if (cfg.draftModel && this.deps.client.preloadDraftModel) {
                void this.deps.client.preloadDraftModel(cfg.draftModel);
            }
        } catch (e: any) {
            if (ac.signal.aborted) {
                // 새 switch 가 우리를 abort 시킨 경우 → 그 switch 가 state 를 새로 정함.
                // 우리는 손대지 말고 빠진다.
                return;
            }
            logError('LM Studio model load failed.', { model: modelKey, error: e?.message ?? String(e) });
            this.deps.notifyError?.(`LM Studio load failed: ${e?.message ?? e}`);
            if (this.loadAbort === ac) this.loadAbort = undefined;
            // Load 실패 → 어떤 모델도 안 떠 있는 깨끗한 상태로 복귀. 다음 호출이 같은
            // 모델을 다시 시도할 수 있게 currentModel 도 비운다.
            this.state = 'idle';
            this.currentModel = null;
        }
    }
}