From d9a2ebfeddd47955d135a96f6cef31497a3ce2b3 Mon Sep 17 00:00:00 2001
From: g1nation <koriweb@gmail.com>
Date: Thu, 7 May 2026 15:57:48 +0900
Subject: [PATCH]  fix: proactive context compression for LM Studio small
 models - compress BEFORE fetch not after error

---
 ...d46d2ca2057b05c488be1dcf439166ac5a9a1.json |  2 +-
 ...9f4f39d2bc368f77456c37b5eef9a94a66b5c.json |  2 +-
 ...5c7a44d7661af673b24e3f49551a7a2e50280.json |  2 +-
 ...adc543795e4b427b64540a49c9ab27c7fe213.json |  4 +-
 ...son => stress_conflict_1778137049510.json} | 20 ++---
 package.json                                  |  2 +-
 src/agent.ts                                  | 73 ++++++++++++++++++-
 7 files changed, 86 insertions(+), 19 deletions(-)
 rename .astra/tests/stress/.astra/missions/{stress_conflict_1778136474544.json => stress_conflict_1778137049510.json} (82%)

diff --git a/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json b/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json
index 8a90ed2..98f4535 100644
--- a/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json
+++ b/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json
@@ -1,5 +1,5 @@
 {
   "result": "Final report with inconsistencies. This should be long enough to pass validation.",
-  "createdAt": 1778136474568,
+  "createdAt": 1778137049532,
   "modelVersion": "unknown"
 }
\ No newline at end of file
diff --git a/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json b/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json
index d2e40f0..1b9ee74 100644
--- a/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json
+++ b/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json
@@ -1,5 +1,5 @@
 {
   "result": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.",
-  "createdAt": 1778136474566,
+  "createdAt": 1778137049529,
   "modelVersion": "unknown"
 }
\ No newline at end of file
diff --git a/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json b/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json
index 107b798..fdb7177 100644
--- a/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json
+++ b/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json
@@ -1,5 +1,5 @@
 {
   "result": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.",
-  "createdAt": 1778136474564,
+  "createdAt": 1778137049527,
   "modelVersion": "unknown"
 }
\ No newline at end of file
diff --git a/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json b/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json
index ef8d822..1ec2769 100644
--- a/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json
+++ b/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json
@@ -1,5 +1,5 @@
 {
-  "result": "---\nid: stress_conflict_1778136474544\ndate: 2026-05-07T06:47:54.569Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (18ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (3ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (2ms)\n",
-  "createdAt": 1778136474569,
+  "result": "---\nid: stress_conflict_1778137049510\ndate: 2026-05-07T06:57:29.533Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (16ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (2ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (3ms)\n",
+  "createdAt": 1778137049533,
   "modelVersion": "unknown"
 }
\ No newline at end of file
diff --git a/.astra/tests/stress/.astra/missions/stress_conflict_1778136474544.json b/.astra/tests/stress/.astra/missions/stress_conflict_1778137049510.json
similarity index 82%
rename from .astra/tests/stress/.astra/missions/stress_conflict_1778136474544.json
rename to .astra/tests/stress/.astra/missions/stress_conflict_1778137049510.json
index d112347..d1b6c01 100644
--- a/.astra/tests/stress/.astra/missions/stress_conflict_1778136474544.json
+++ b/.astra/tests/stress/.astra/missions/stress_conflict_1778137049510.json
@@ -1,8 +1,8 @@
 {
-  "missionId": "stress_conflict_1778136474544",
+  "missionId": "stress_conflict_1778137049510",
   "status": "completed",
-  "startTime": "2026-05-07T06:47:54.544Z",
-  "totalElapsedMs": 26,
+  "startTime": "2026-05-07T06:57:29.510Z",
+  "totalElapsedMs": 24,
   "results": {
     "planner": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.",
     "researcher": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.",
@@ -16,30 +16,30 @@
     {
       "from": "idle",
       "to": "planner",
-      "durationMs": 18,
+      "durationMs": 16,
       "message": "전략 수립 중...",
-      "ts": "2026-05-07T06:47:54.562Z"
+      "ts": "2026-05-07T06:57:29.526Z"
     },
     {
       "from": "planner",
       "to": "researcher",
-      "durationMs": 3,
+      "durationMs": 2,
       "message": "핵심 정보 수집 및 분석 중...",
-      "ts": "2026-05-07T06:47:54.565Z"
+      "ts": "2026-05-07T06:57:29.528Z"
     },
     {
       "from": "researcher",
       "to": "writer",
-      "durationMs": 2,
+      "durationMs": 3,
       "message": "최종 리포트 작성 및 편집 중...",
-      "ts": "2026-05-07T06:47:54.567Z"
+      "ts": "2026-05-07T06:57:29.531Z"
     },
     {
       "from": "writer",
       "to": "completed",
       "durationMs": 3,
       "message": "미션 완료",
-      "ts": "2026-05-07T06:47:54.570Z"
+      "ts": "2026-05-07T06:57:29.534Z"
     }
   ],
   "resilienceMetrics": {
diff --git a/package.json b/package.json
index 51458c4..4e8b2e9 100644
--- a/package.json
+++ b/package.json
@@ -2,7 +2,7 @@
   "name": "astra",
   "displayName": "Astra",
   "description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.",
-  "version": "2.80.14",
+  "version": "2.80.15",
   "publisher": "g1nation",
   "license": "MIT",
   "icon": "assets/icon.png",
diff --git a/src/agent.ts b/src/agent.ts
index 09e8669..a07189b 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -2022,16 +2022,83 @@ export class AgentExecutor {
 
             for (const candidateModel of modelCandidates) {
                 for (const variant of messageVariants) {
-                    // 실제 전송할 메시지 (n_ctx 재시도 시 수정됨)
+                    // 실제 전송할 메시지
                     let finalMessages = variant.messages;
+
+                    // ── LM Studio 선제적 컨텍스트 압축 ──
+                    // 소형 모델(4B 등)은 GPU 메모리 부족으로 n_ctx가 설정값보다 크게 줄어들 수 있고,
+                    // 이때 LM Studio는 에러 대신 200 OK + 빈 스트림을 반환하여 재시도 불가.
+                    // 따라서 전송 전에 선제적으로 메시지를 n_ctx에 맞게 압축합니다.
+                    if (engine === 'lmstudio') {
+                        const totalCharsRaw = finalMessages.reduce((acc, m) => acc + String(m.content || '').length, 0);
+                        const estimatedTokensRaw = Math.ceil(totalCharsRaw / 4);
+                        const LM_CTX_SAFE_LIMIT = 3500; // 4096 n_ctx 기준 안전 마진
+
+                        if (estimatedTokensRaw > LM_CTX_SAFE_LIMIT) {
+                            logInfo('LM Studio proactive compression triggered.', {
+                                estimatedTokens: estimatedTokensRaw,
+                                limit: LM_CTX_SAFE_LIMIT,
+                                originalMessageCount: finalMessages.length
+                            });
+
+                            // 1. system 메시지에서 [CONTEXT] 이후 부분을 우선 제거
+                            const sysIdx = finalMessages.findIndex(m => m.role === 'system');
+                            if (sysIdx >= 0) {
+                                const sysContent = String(finalMessages[sysIdx].content || '');
+                                const contextSplit = sysContent.indexOf('[CONTEXT]');
+                                if (contextSplit > 0) {
+                                    // [CONTEXT] 이전까지만 유지 (기본 시스템 프롬프트 + 핵심 지시)
+                                    const trimmedSys = sysContent.slice(0, contextSplit).trimEnd();
+                                    finalMessages = finalMessages.map((m, i) =>
+                                        i === sysIdx ? { ...m, content: trimmedSys + '\n[Context omitted: model context limit]' } : m
+                                    );
+                                }
+                            }
+
+                            // 2. 그래도 크면 시스템 프롬프트를 max 글자로 강제 잘라냄
+                            const afterTrimChars = finalMessages.reduce((acc, m) => acc + String(m.content || '').length, 0);
+                            const afterTrimTokens = Math.ceil(afterTrimChars / 4);
+                            if (afterTrimTokens > LM_CTX_SAFE_LIMIT && sysIdx >= 0) {
+                                // 유저 메시지 토큰 계산
+                                const nonSysTokens = finalMessages
+                                    .filter((_, i) => i !== sysIdx)
+                                    .reduce((acc, m) => acc + String(m.content || '').length, 0) / 4;
+                                const maxSysChars = Math.max(2000, (LM_CTX_SAFE_LIMIT - Math.ceil(nonSysTokens) - 512)) * 4;
+                                const sysContent = String(finalMessages[sysIdx].content || '');
+                                if (sysContent.length > maxSysChars) {
+                                    finalMessages = finalMessages.map((m, i) =>
+                                        i === sysIdx ? { ...m, content: sysContent.slice(0, maxSysChars) + '\n[Truncated for model context limit]' } : m
+                                    );
+                                }
+                            }
+
+                            // 3. 히스토리 메시지 정리: system + 마지막 user만 유지
+                            const finalCheck = finalMessages.reduce((acc, m) => acc + String(m.content || '').length, 0) / 4;
+                            if (finalCheck > LM_CTX_SAFE_LIMIT) {
+                                const sysMsg = finalMessages.find(m => m.role === 'system');
+                                const lastUserMsg = [...finalMessages].reverse().find(m => m.role === 'user');
+                                finalMessages = [
+                                    ...(sysMsg ? [sysMsg] : []),
+                                    ...(lastUserMsg ? [lastUserMsg] : [])
+                                ];
+                            }
+
+                            logInfo('LM Studio compression result.', {
+                                originalTokens: estimatedTokensRaw,
+                                compressedTokens: Math.ceil(finalMessages.reduce((a, m) => a + String(m.content || '').length, 0) / 4),
+                                messageCount: finalMessages.length
+                            });
+                        }
+                    }
+
                     const totalChars = finalMessages.reduce((acc, m) => acc + String(m.content || '').length, 0);
                     const estimatedTokens = Math.ceil(totalChars / 4);
                     const streamBody = {
                         model: candidateModel,
-                        messages: finalMessages,
+                        messages: finalMessages.map(m => ({ role: m.role, content: m.content })),
                         stream: true,
                         ...(engine === 'lmstudio'
-                            ? { max_tokens: 4096, temperature }
+                            ? { max_tokens: Math.min(4096, Math.max(256, 3500 - estimatedTokens)), temperature }
                             : { options: { num_ctx: 32768, num_predict: 4096, temperature } }),
                     };
                     logInfo('AI streaming request started.', {