fix(lmstudio): 모델 전환 시 다른 모델 전부 자동 언로드 (v2.2.210)

VRAM 부족으로 12b 등 다른 모델 로드 실패하던 문제 강화. - lifecycleManager.doSwitch: 추적 중인 currentModel 만이 아니라 listLoaded() 기반으로 *로드된 모든 LLM* 을 타깃 전 언로드(VRAM 회수). draft 모델·임베딩 모델은 보호. listLoaded 실패 시 기존 동작(tracked unload)으로 폴백. - extension.ts: defaultModel 설정 변경(설정 패널/settings.json 포함) 시 lifecycle.onModelSelected 호출 → 설정 패널 전환도 unload→load 발동. - 테스트 FakeLMStudioClient 가 실제 로드 상태를 추적하도록 갱신. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 11:27:43 +09:00
parent b4ddd4f79a
commit 6d06311d60
5 changed files with 46 additions and 22 deletions
@@ -1,12 +1,12 @@
 {
  "name": "astra",
-  "version": "2.2.209",
+  "version": "2.2.210",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "astra",
-      "version": "2.2.209",
+      "version": "2.2.210",
      "license": "MIT",
      "dependencies": {
        "@lmstudio/sdk": "^1.5.0",
@@ -2,7 +2,7 @@
  "name": "astra",
  "displayName": "Astra",
  "description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.",
-  "version": "2.2.209",
+  "version": "2.2.210",
  "publisher": "g1nation",
  "license": "MIT",
  "icon": "assets/icon.png",
@@ -142,6 +142,14 @@ export async function activate(context: vscode.ExtensionContext) {
            // _sendModels is best-effort; the provider may not have a webview
            // attached yet during very early activation.
            void provider?._sendModels(touchedUrl);
+            // 모델이 *어디서든*(설정 패널·settings.json·사이드바) 바뀌면 lifecycle 을
+            // 깨워 이전 모델 자동 언로드 → 새 모델 로드. 이게 없으면 설정 패널에서
+            // 전환 시 lifecycle 이 모른 채 추론 시점에 JIT 로드만 돼 VRAM 이 안 비고
+            // 로드 실패가 난다.
+            if (touchedModel) {
+                const newModel = (vscode.workspace.getConfiguration('g1nation').get<string>('defaultModel', '') || '').trim();
+                if (newModel) lifecycle.onModelSelected(newModel);
+            }
        })
    );

@@ -260,24 +260,36 @@ export class ModelLifecycleManager {
        this.cancelLoad();
        this.clearIdleTimer();

-        // ── 1) Unload 이전 모델 (있으면) ──────────────────────────────────────
-        // 의도: 메모리 회수. 실패해도 load 는 *무조건* 진행 — LM Studio 가 unload
-        // 못 한 모델은 보통 그냥 그대로 메모리에 떠 있고, load 가 새 모델로 메모리를
-        // 덮어쓰면서 자연 회수되는 경우가 많다. 여기서 throw 하면 사용자가 모델
-        // 교체 자체를 못 함.
-        // 또한 unload 실패해도 currentModel 은 null 로 정리 — 다음 단계에서 어차피
-        // modelKey 로 덮어쓰지만, 그 사이에 다른 코드가 currentModel 을 읽을 때
-        // "이미 없는 prev" 를 가리키지 않도록.
-        if (this.state === 'loaded' && this.currentModel && this.currentModel !== modelKey) {
-            const prev = this.currentModel;
+        // ── 1) 타깃 외 *로드된 모든 LLM* 언로드 (VRAM 회수) ───────────────────
+        // lifecycle 이 추적하는 currentModel 뿐 아니라, 수동 로드·JIT·이전 세션으로
+        // LM Studio 에 떠 있는 다른 모델까지 모두 내린다. (예: 26b 가 떠 있는 상태에서
+        // 12b 로 전환 시 26b 를 자동 언로드해 VRAM 을 비워야 12b 가 로드된다.)
+        // 보호: ① 타깃 모델 ② 설정된 draft 모델(speculative decoding) ③ 임베딩 모델
+        //       (검색 기능이 의존) 은 언로드하지 않는다.
+        // 실패해도 load 는 무조건 진행 — 한 모델 unload 실패가 전체 전환을 막지 않게.
        this.state = 'unloading';
+        const cfg0 = this.deps.getConfig();
+        const keep = new Set<string>([modelKey, cfg0.draftModel].filter((m): m is string => !!m));
        try {
-                await this.deps.client.unload(prev);
+            const loaded = await this.deps.client.listLoaded();
+            for (const m of loaded) {
+                if (keep.has(m)) continue;
+                if (/embed/i.test(m)) continue; // 임베딩 모델 보호
+                try {
+                    await this.deps.client.unload(m);
+                    logInfo('LM Studio: 전환 전 다른 모델 언로드 (VRAM 회수).', { unloaded: m, target: modelKey });
                } catch (e: any) {
-                logError('LM Studio unload before switch failed — load 진행 강행.', { prev, error: e?.message ?? String(e) });
+                    logError('LM Studio unload before switch failed — 계속 진행.', { model: m, error: e?.message ?? String(e) });
+                }
+            }
+        } catch (e: any) {
+            // listLoaded 실패 시: 추적 중인 currentModel 만이라도 언로드 (기존 동작).
+            logError('listLoaded failed before switch — tracked currentModel 만 언로드 시도.', { error: e?.message ?? String(e) });
+            if (this.currentModel && this.currentModel !== modelKey) {
+                try { await this.deps.client.unload(this.currentModel); } catch { /* noop */ }
+            }
        }
        this.currentModel = null;
-        }

        this.checkMemoryBudget(modelKey);

@@ -33,6 +33,8 @@ class FakeLMStudioClient implements ILMStudioClient {
    public failNextUnload: Error | null = null;
    public loadDelayMs = 0;
    public lastLoadSignal: AbortSignal | undefined;
+    /** 실제 로드 상태 추적 — listLoaded()가 이를 반영해야 lifecycle 의 '전체 언로드'를 검증할 수 있다. */
+    public loaded = new Set<string>();

    setBaseUrl(_: string): void { /* noop */ }

@@ -54,6 +56,7 @@ class FakeLMStudioClient implements ILMStudioClient {
            this.failNextLoad = null;
            throw err;
        }
+        this.loaded.add(modelKey); // 성공 시에만 로드 상태로
    }

    async unload(modelKey: string): Promise<void> {
@@ -61,13 +64,14 @@ class FakeLMStudioClient implements ILMStudioClient {
        if (this.failNextUnload) {
            const err = this.failNextUnload;
            this.failNextUnload = null;
-            throw err;
+            throw err; // 실패 시 로드 상태 유지
        }
+        this.loaded.delete(modelKey);
    }

    async listLoaded(): Promise<string[]> {
        this.listLoadedCalls++;
-        return [];
+        return [...this.loaded];
    }

    async isReachable(): Promise<boolean> {
@@ -75,7 +79,7 @@ class FakeLMStudioClient implements ILMStudioClient {
    }

    async listLoadedCached(): Promise<string[]> {
-        return [];
+        return [...this.loaded];
    }

    async listDownloaded(): Promise<string[]> {