diff --git a/package-lock.json b/package-lock.json
index 57e8b7d..e0ae145 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "astra",
-  "version": "2.2.209",
+  "version": "2.2.210",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "astra",
-      "version": "2.2.209",
+      "version": "2.2.210",
       "license": "MIT",
       "dependencies": {
         "@lmstudio/sdk": "^1.5.0",
diff --git a/package.json b/package.json
index cc125ec..0ee4a4c 100644
--- a/package.json
+++ b/package.json
@@ -2,7 +2,7 @@
   "name": "astra",
   "displayName": "Astra",
   "description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.",
-  "version": "2.2.209",
+  "version": "2.2.210",
   "publisher": "g1nation",
   "license": "MIT",
   "icon": "assets/icon.png",
diff --git a/src/extension.ts b/src/extension.ts
index 2a1635e..1506b3f 100644
--- a/src/extension.ts
+++ b/src/extension.ts
@@ -142,6 +142,14 @@ export async function activate(context: vscode.ExtensionContext) {
             // _sendModels is best-effort; the provider may not have a webview
             // attached yet during very early activation.
             void provider?._sendModels(touchedUrl);
+            // 모델이 *어디서든*(설정 패널·settings.json·사이드바) 바뀌면 lifecycle 을
+            // 깨워 이전 모델 자동 언로드 → 새 모델 로드. 이게 없으면 설정 패널에서
+            // 전환 시 lifecycle 이 모른 채 추론 시점에 JIT 로드만 돼 VRAM 이 안 비고
+            // 로드 실패가 난다.
+            if (touchedModel) {
+                const newModel = (vscode.workspace.getConfiguration('g1nation').get<string>('defaultModel', '') || '').trim();
+                if (newModel) lifecycle.onModelSelected(newModel);
+            }
         })
     );
 
diff --git a/src/lmstudio/lifecycleManager.ts b/src/lmstudio/lifecycleManager.ts
index eebbef1..93b685c 100644
--- a/src/lmstudio/lifecycleManager.ts
+++ b/src/lmstudio/lifecycleManager.ts
@@ -260,24 +260,36 @@ export class ModelLifecycleManager {
         this.cancelLoad();
         this.clearIdleTimer();
 
-        // ── 1) Unload 이전 모델 (있으면) ──────────────────────────────────────
-        // 의도: 메모리 회수. 실패해도 load 는 *무조건* 진행 — LM Studio 가 unload
-        // 못 한 모델은 보통 그냥 그대로 메모리에 떠 있고, load 가 새 모델로 메모리를
-        // 덮어쓰면서 자연 회수되는 경우가 많다. 여기서 throw 하면 사용자가 모델
-        // 교체 자체를 못 함.
-        // 또한 unload 실패해도 currentModel 은 null 로 정리 — 다음 단계에서 어차피
-        // modelKey 로 덮어쓰지만, 그 사이에 다른 코드가 currentModel 을 읽을 때
-        // "이미 없는 prev" 를 가리키지 않도록.
-        if (this.state === 'loaded' && this.currentModel && this.currentModel !== modelKey) {
-            const prev = this.currentModel;
-            this.state = 'unloading';
-            try {
-                await this.deps.client.unload(prev);
-            } catch (e: any) {
-                logError('LM Studio unload before switch failed — load 진행 강행.', { prev, error: e?.message ?? String(e) });
+        // ── 1) 타깃 외 *로드된 모든 LLM* 언로드 (VRAM 회수) ───────────────────
+        // lifecycle 이 추적하는 currentModel 뿐 아니라, 수동 로드·JIT·이전 세션으로
+        // LM Studio 에 떠 있는 다른 모델까지 모두 내린다. (예: 26b 가 떠 있는 상태에서
+        // 12b 로 전환 시 26b 를 자동 언로드해 VRAM 을 비워야 12b 가 로드된다.)
+        // 보호: ① 타깃 모델 ② 설정된 draft 모델(speculative decoding) ③ 임베딩 모델
+        //       (검색 기능이 의존) 은 언로드하지 않는다.
+        // 실패해도 load 는 무조건 진행 — 한 모델 unload 실패가 전체 전환을 막지 않게.
+        this.state = 'unloading';
+        const cfg0 = this.deps.getConfig();
+        const keep = new Set<string>([modelKey, cfg0.draftModel].filter((m): m is string => !!m));
+        try {
+            const loaded = await this.deps.client.listLoaded();
+            for (const m of loaded) {
+                if (keep.has(m)) continue;
+                if (/embed/i.test(m)) continue; // 임베딩 모델 보호
+                try {
+                    await this.deps.client.unload(m);
+                    logInfo('LM Studio: 전환 전 다른 모델 언로드 (VRAM 회수).', { unloaded: m, target: modelKey });
+                } catch (e: any) {
+                    logError('LM Studio unload before switch failed — 계속 진행.', { model: m, error: e?.message ?? String(e) });
+                }
+            }
+        } catch (e: any) {
+            // listLoaded 실패 시: 추적 중인 currentModel 만이라도 언로드 (기존 동작).
+            logError('listLoaded failed before switch — tracked currentModel 만 언로드 시도.', { error: e?.message ?? String(e) });
+            if (this.currentModel && this.currentModel !== modelKey) {
+                try { await this.deps.client.unload(this.currentModel); } catch { /* noop */ }
             }
-            this.currentModel = null;
         }
+        this.currentModel = null;
 
         this.checkMemoryBudget(modelKey);
 
diff --git a/tests/lmStudioLifecycle.test.ts b/tests/lmStudioLifecycle.test.ts
index 2f16323..af70702 100644
--- a/tests/lmStudioLifecycle.test.ts
+++ b/tests/lmStudioLifecycle.test.ts
@@ -33,6 +33,8 @@ class FakeLMStudioClient implements ILMStudioClient {
     public failNextUnload: Error | null = null;
     public loadDelayMs = 0;
     public lastLoadSignal: AbortSignal | undefined;
+    /** 실제 로드 상태 추적 — listLoaded()가 이를 반영해야 lifecycle 의 '전체 언로드'를 검증할 수 있다. */
+    public loaded = new Set<string>();
 
     setBaseUrl(_: string): void { /* noop */ }
 
@@ -54,6 +56,7 @@ class FakeLMStudioClient implements ILMStudioClient {
             this.failNextLoad = null;
             throw err;
         }
+        this.loaded.add(modelKey); // 성공 시에만 로드 상태로
     }
 
     async unload(modelKey: string): Promise<void> {
@@ -61,13 +64,14 @@ class FakeLMStudioClient implements ILMStudioClient {
         if (this.failNextUnload) {
             const err = this.failNextUnload;
             this.failNextUnload = null;
-            throw err;
+            throw err; // 실패 시 로드 상태 유지
         }
+        this.loaded.delete(modelKey);
     }
 
     async listLoaded(): Promise<string[]> {
         this.listLoadedCalls++;
-        return [];
+        return [...this.loaded];
     }
 
     async isReachable(): Promise<boolean> {
@@ -75,7 +79,7 @@ class FakeLMStudioClient implements ILMStudioClient {
     }
 
     async listLoadedCached(): Promise<string[]> {
-        return [];
+        return [...this.loaded];
     }
 
     async listDownloaded(): Promise<string[]> {