From d9a2ebfeddd47955d135a96f6cef31497a3ce2b3 Mon Sep 17 00:00:00 2001 From: g1nation Date: Thu, 7 May 2026 15:57:48 +0900 Subject: [PATCH] fix: proactive context compression for LM Studio small models - compress BEFORE fetch not after error --- ...d46d2ca2057b05c488be1dcf439166ac5a9a1.json | 2 +- ...9f4f39d2bc368f77456c37b5eef9a94a66b5c.json | 2 +- ...5c7a44d7661af673b24e3f49551a7a2e50280.json | 2 +- ...adc543795e4b427b64540a49c9ab27c7fe213.json | 4 +- ...son => stress_conflict_1778137049510.json} | 20 ++--- package.json | 2 +- src/agent.ts | 73 ++++++++++++++++++- 7 files changed, 86 insertions(+), 19 deletions(-) rename .astra/tests/stress/.astra/missions/{stress_conflict_1778136474544.json => stress_conflict_1778137049510.json} (82%) diff --git a/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json b/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json index 8a90ed2..98f4535 100644 --- a/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json +++ b/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json @@ -1,5 +1,5 @@ { "result": "Final report with inconsistencies. This should be long enough to pass validation.", - "createdAt": 1778136474568, + "createdAt": 1778137049532, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json b/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json index d2e40f0..1b9ee74 100644 --- a/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json +++ b/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json @@ -1,5 +1,5 @@ { "result": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.", - "createdAt": 1778136474566, + "createdAt": 1778137049529, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json b/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json index 107b798..fdb7177 100644 --- a/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json +++ b/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json @@ -1,5 +1,5 @@ { "result": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.", - "createdAt": 1778136474564, + "createdAt": 1778137049527, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json b/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json index ef8d822..1ec2769 100644 --- a/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json +++ b/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json @@ -1,5 +1,5 @@ { - "result": "---\nid: stress_conflict_1778136474544\ndate: 2026-05-07T06:47:54.569Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (18ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (3ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (2ms)\n", - "createdAt": 1778136474569, + "result": "---\nid: stress_conflict_1778137049510\ndate: 2026-05-07T06:57:29.533Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (16ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (2ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (3ms)\n", + "createdAt": 1778137049533, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/missions/stress_conflict_1778136474544.json b/.astra/tests/stress/.astra/missions/stress_conflict_1778137049510.json similarity index 82% rename from .astra/tests/stress/.astra/missions/stress_conflict_1778136474544.json rename to .astra/tests/stress/.astra/missions/stress_conflict_1778137049510.json index d112347..d1b6c01 100644 --- a/.astra/tests/stress/.astra/missions/stress_conflict_1778136474544.json +++ b/.astra/tests/stress/.astra/missions/stress_conflict_1778137049510.json @@ -1,8 +1,8 @@ { - "missionId": "stress_conflict_1778136474544", + "missionId": "stress_conflict_1778137049510", "status": "completed", - "startTime": "2026-05-07T06:47:54.544Z", - "totalElapsedMs": 26, + "startTime": "2026-05-07T06:57:29.510Z", + "totalElapsedMs": 24, "results": { "planner": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.", "researcher": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.", @@ -16,30 +16,30 @@ { "from": "idle", "to": "planner", - "durationMs": 18, + "durationMs": 16, "message": "전략 수립 중...", - "ts": "2026-05-07T06:47:54.562Z" + "ts": "2026-05-07T06:57:29.526Z" }, { "from": "planner", "to": "researcher", - "durationMs": 3, + "durationMs": 2, "message": "핵심 정보 수집 및 분석 중...", - "ts": "2026-05-07T06:47:54.565Z" + "ts": "2026-05-07T06:57:29.528Z" }, { "from": "researcher", "to": "writer", - "durationMs": 2, + "durationMs": 3, "message": "최종 리포트 작성 및 편집 중...", - "ts": "2026-05-07T06:47:54.567Z" + "ts": "2026-05-07T06:57:29.531Z" }, { "from": "writer", "to": "completed", "durationMs": 3, "message": "미션 완료", - "ts": "2026-05-07T06:47:54.570Z" + "ts": "2026-05-07T06:57:29.534Z" } ], "resilienceMetrics": { diff --git a/package.json b/package.json index 51458c4..4e8b2e9 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "astra", "displayName": "Astra", "description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.", - "version": "2.80.14", + "version": "2.80.15", "publisher": "g1nation", "license": "MIT", "icon": "assets/icon.png", diff --git a/src/agent.ts b/src/agent.ts index 09e8669..a07189b 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -2022,16 +2022,83 @@ export class AgentExecutor { for (const candidateModel of modelCandidates) { for (const variant of messageVariants) { - // 실제 전송할 메시지 (n_ctx 재시도 시 수정됨) + // 실제 전송할 메시지 let finalMessages = variant.messages; + + // ── LM Studio 선제적 컨텍스트 압축 ── + // 소형 모델(4B 등)은 GPU 메모리 부족으로 n_ctx가 설정값보다 크게 줄어들 수 있고, + // 이때 LM Studio는 에러 대신 200 OK + 빈 스트림을 반환하여 재시도 불가. + // 따라서 전송 전에 선제적으로 메시지를 n_ctx에 맞게 압축합니다. + if (engine === 'lmstudio') { + const totalCharsRaw = finalMessages.reduce((acc, m) => acc + String(m.content || '').length, 0); + const estimatedTokensRaw = Math.ceil(totalCharsRaw / 4); + const LM_CTX_SAFE_LIMIT = 3500; // 4096 n_ctx 기준 안전 마진 + + if (estimatedTokensRaw > LM_CTX_SAFE_LIMIT) { + logInfo('LM Studio proactive compression triggered.', { + estimatedTokens: estimatedTokensRaw, + limit: LM_CTX_SAFE_LIMIT, + originalMessageCount: finalMessages.length + }); + + // 1. system 메시지에서 [CONTEXT] 이후 부분을 우선 제거 + const sysIdx = finalMessages.findIndex(m => m.role === 'system'); + if (sysIdx >= 0) { + const sysContent = String(finalMessages[sysIdx].content || ''); + const contextSplit = sysContent.indexOf('[CONTEXT]'); + if (contextSplit > 0) { + // [CONTEXT] 이전까지만 유지 (기본 시스템 프롬프트 + 핵심 지시) + const trimmedSys = sysContent.slice(0, contextSplit).trimEnd(); + finalMessages = finalMessages.map((m, i) => + i === sysIdx ? { ...m, content: trimmedSys + '\n[Context omitted: model context limit]' } : m + ); + } + } + + // 2. 그래도 크면 시스템 프롬프트를 max 글자로 강제 잘라냄 + const afterTrimChars = finalMessages.reduce((acc, m) => acc + String(m.content || '').length, 0); + const afterTrimTokens = Math.ceil(afterTrimChars / 4); + if (afterTrimTokens > LM_CTX_SAFE_LIMIT && sysIdx >= 0) { + // 유저 메시지 토큰 계산 + const nonSysTokens = finalMessages + .filter((_, i) => i !== sysIdx) + .reduce((acc, m) => acc + String(m.content || '').length, 0) / 4; + const maxSysChars = Math.max(2000, (LM_CTX_SAFE_LIMIT - Math.ceil(nonSysTokens) - 512)) * 4; + const sysContent = String(finalMessages[sysIdx].content || ''); + if (sysContent.length > maxSysChars) { + finalMessages = finalMessages.map((m, i) => + i === sysIdx ? { ...m, content: sysContent.slice(0, maxSysChars) + '\n[Truncated for model context limit]' } : m + ); + } + } + + // 3. 히스토리 메시지 정리: system + 마지막 user만 유지 + const finalCheck = finalMessages.reduce((acc, m) => acc + String(m.content || '').length, 0) / 4; + if (finalCheck > LM_CTX_SAFE_LIMIT) { + const sysMsg = finalMessages.find(m => m.role === 'system'); + const lastUserMsg = [...finalMessages].reverse().find(m => m.role === 'user'); + finalMessages = [ + ...(sysMsg ? [sysMsg] : []), + ...(lastUserMsg ? [lastUserMsg] : []) + ]; + } + + logInfo('LM Studio compression result.', { + originalTokens: estimatedTokensRaw, + compressedTokens: Math.ceil(finalMessages.reduce((a, m) => a + String(m.content || '').length, 0) / 4), + messageCount: finalMessages.length + }); + } + } + const totalChars = finalMessages.reduce((acc, m) => acc + String(m.content || '').length, 0); const estimatedTokens = Math.ceil(totalChars / 4); const streamBody = { model: candidateModel, - messages: finalMessages, + messages: finalMessages.map(m => ({ role: m.role, content: m.content })), stream: true, ...(engine === 'lmstudio' - ? { max_tokens: 4096, temperature } + ? { max_tokens: Math.min(4096, Math.max(256, 3500 - estimatedTokens)), temperature } : { options: { num_ctx: 32768, num_predict: 4096, temperature } }), }; logInfo('AI streaming request started.', {