From faf3060ae77939c7894e0136f82a5e128cfe33df Mon Sep 17 00:00:00 2001 From: g1nation Date: Thu, 7 May 2026 15:48:13 +0900 Subject: [PATCH] fix: n_ctx retry in createStreamingRequest - compress msgs at API level not handlePrompt --- ...d46d2ca2057b05c488be1dcf439166ac5a9a1.json | 2 +- ...9f4f39d2bc368f77456c37b5eef9a94a66b5c.json | 2 +- ...5c7a44d7661af673b24e3f49551a7a2e50280.json | 2 +- ...adc543795e4b427b64540a49c9ab27c7fe213.json | 4 +- ...son => stress_conflict_1778136474544.json} | 18 +-- package.json | 2 +- src/agent.ts | 108 ++++++++++++------ 7 files changed, 86 insertions(+), 52 deletions(-) rename .astra/tests/stress/.astra/missions/{stress_conflict_1778136180610.json => stress_conflict_1778136474544.json} (81%) diff --git a/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json b/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json index d6d4dd2..8a90ed2 100644 --- a/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json +++ b/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json @@ -1,5 +1,5 @@ { "result": "Final report with inconsistencies. This should be long enough to pass validation.", - "createdAt": 1778136180630, + "createdAt": 1778136474568, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json b/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json index 7047312..d2e40f0 100644 --- a/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json +++ b/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json @@ -1,5 +1,5 @@ { "result": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.", - "createdAt": 1778136180628, + "createdAt": 1778136474566, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json b/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json index 7f67c46..107b798 100644 --- a/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json +++ b/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json @@ -1,5 +1,5 @@ { "result": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.", - "createdAt": 1778136180626, + "createdAt": 1778136474564, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json b/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json index 8e81b08..ef8d822 100644 --- a/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json +++ b/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json @@ -1,5 +1,5 @@ { - "result": "---\nid: stress_conflict_1778136180610\ndate: 2026-05-07T06:43:00.632Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (14ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (3ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (2ms)\n", - "createdAt": 1778136180632, + "result": "---\nid: stress_conflict_1778136474544\ndate: 2026-05-07T06:47:54.569Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (18ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (3ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (2ms)\n", + "createdAt": 1778136474569, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/missions/stress_conflict_1778136180610.json b/.astra/tests/stress/.astra/missions/stress_conflict_1778136474544.json similarity index 81% rename from .astra/tests/stress/.astra/missions/stress_conflict_1778136180610.json rename to .astra/tests/stress/.astra/missions/stress_conflict_1778136474544.json index f0aa6d3..d112347 100644 --- a/.astra/tests/stress/.astra/missions/stress_conflict_1778136180610.json +++ b/.astra/tests/stress/.astra/missions/stress_conflict_1778136474544.json @@ -1,8 +1,8 @@ { - "missionId": "stress_conflict_1778136180610", + "missionId": "stress_conflict_1778136474544", "status": "completed", - "startTime": "2026-05-07T06:43:00.610Z", - "totalElapsedMs": 23, + "startTime": "2026-05-07T06:47:54.544Z", + "totalElapsedMs": 26, "results": { "planner": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.", "researcher": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.", @@ -16,30 +16,30 @@ { "from": "idle", "to": "planner", - "durationMs": 14, + "durationMs": 18, "message": "전략 수립 중...", - "ts": "2026-05-07T06:43:00.624Z" + "ts": "2026-05-07T06:47:54.562Z" }, { "from": "planner", "to": "researcher", "durationMs": 3, "message": "핵심 정보 수집 및 분석 중...", - "ts": "2026-05-07T06:43:00.627Z" + "ts": "2026-05-07T06:47:54.565Z" }, { "from": "researcher", "to": "writer", "durationMs": 2, "message": "최종 리포트 작성 및 편집 중...", - "ts": "2026-05-07T06:43:00.629Z" + "ts": "2026-05-07T06:47:54.567Z" }, { "from": "writer", "to": "completed", - "durationMs": 4, + "durationMs": 3, "message": "미션 완료", - "ts": "2026-05-07T06:43:00.633Z" + "ts": "2026-05-07T06:47:54.570Z" } ], "resilienceMetrics": { diff --git a/package.json b/package.json index 391a016..51458c4 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "astra", "displayName": "Astra", "description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.", - "version": "2.80.13", + "version": "2.80.14", "publisher": "g1nation", "license": "MIT", "icon": "assets/icon.png", diff --git a/src/agent.ts b/src/agent.ts index 4a74868..09e8669 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -526,38 +526,7 @@ export class AgentExecutor { logInfo('Generation aborted by user.'); } else { logError('Stream reading error.', { engine, apiUrl, error: err?.message || String(err) }); - // LM Studio llama.cpp n_keep > n_ctx 에러 감지 → 자동 압축 재시도 - const errMsg = String(err?.message || err || ''); - const nCtxMatch = errMsg.match(/n_keep\s*:\s*(\d+)\s*>=?\s*n_ctx\s*:\s*(\d+)/); - if (nCtxMatch && loopDepth < 1) { - // loopDepth < 1: 재시도는 최초 1회만 (무한루프 방지) - const nCtx = parseInt(nCtxMatch[2], 10); - const nKeep = parseInt(nCtxMatch[1], 10); - logInfo('n_ctx overflow → auto-retrying with compressed system prompt.', { nKeep, nCtx }); - // 시스템 프롬프트를 n_ctx - 응답여유(512토큰) 범위로 강제 압축 - const maxSysChars = Math.max(800, (nCtx - 512)) * 4; - const compressedSysPrompt = systemPrompt.length > maxSysChars - ? systemPrompt.slice(0, maxSysChars) + `\n[System prompt compressed: n_ctx=${nCtx}]` - : systemPrompt; - this.webview?.postMessage({ - type: 'streamChunk', - value: `\n⚠️ *LM Studio n_ctx=${nCtx} 감지. 컨텍스트 압축 후 재시도 중...*\n` - }); - await this.handlePrompt(prompt, modelName, { - ...options, - loopDepth: loopDepth + 1, // 재시도 플래그 - runId, - systemPrompt: compressedSysPrompt - }); - } else if (nCtxMatch) { - // 재시도 이미 했는데도 실패한 경우 - this.webview?.postMessage({ - type: 'error', - value: `LM Studio n_ctx(${nCtxMatch[2]}) 초과: 압축 재시도도 실패했습니다. LM Studio에서 모델을 재로드하세요.` - }); - } else { - this.webview?.postMessage({ type: 'error', value: `Connection lost: ${err.message}` }); - } + this.webview?.postMessage({ type: 'error', value: `Connection lost: ${err.message}` }); } } @@ -2044,6 +2013,7 @@ export class AgentExecutor { const primaryEngine = resolveEngine(baseUrl); const engines = primaryEngine === 'lmstudio' ? ['lmstudio', 'ollama'] as const : ['ollama', 'lmstudio'] as const; let lastError: Error | null = null; + let nCtxRetried = false; // n_ctx 재시도 1회 제한 for (const engine of engines) { const apiUrl = buildApiUrl(baseUrl, engine, 'chat'); @@ -2052,11 +2022,13 @@ export class AgentExecutor { for (const candidateModel of modelCandidates) { for (const variant of messageVariants) { - const totalChars = variant.messages.reduce((acc, m) => acc + String(m.content || '').length, 0); + // 실제 전송할 메시지 (n_ctx 재시도 시 수정됨) + let finalMessages = variant.messages; + const totalChars = finalMessages.reduce((acc, m) => acc + String(m.content || '').length, 0); const estimatedTokens = Math.ceil(totalChars / 4); const streamBody = { model: candidateModel, - messages: variant.messages, + messages: finalMessages, stream: true, ...(engine === 'lmstudio' ? { max_tokens: 4096, temperature } @@ -2065,10 +2037,10 @@ export class AgentExecutor { logInfo('AI streaming request started.', { engine, apiUrl, model: candidateModel, variant: variant.name, - messageCount: variant.messages.length, + messageCount: finalMessages.length, estimatedTokens, - roles: variant.messages.map(message => message.role), - firstUserPreview: summarizeText(String(variant.messages.find(message => message.role === 'user')?.content || ''), 300) + roles: finalMessages.map(message => message.role), + firstUserPreview: summarizeText(String(finalMessages.find(message => message.role === 'user')?.content || ''), 300) }); try { @@ -2086,6 +2058,68 @@ export class AgentExecutor { if (!response.ok) { const errText = await response.text(); + + // ── LM Studio n_keep >= n_ctx 에러 감지 및 자동 재시도 ── + const nCtxMatch = errText.match(/n_keep\s*:\s*(\d+)\s*>=?\s*n_ctx\s*:\s*(\d+)/); + if (nCtxMatch && engine === 'lmstudio' && !nCtxRetried) { + nCtxRetried = true; + const nCtx = parseInt(nCtxMatch[2], 10); + logInfo(`n_ctx overflow detected (n_ctx=${nCtx}). Compressing messages and retrying...`); + + // system 메시지를 n_ctx 크기에 맞게 강제 압축 + const maxResponseTokens = 512; + const maxSysTokens = Math.max(500, nCtx - maxResponseTokens); + const maxSysChars = maxSysTokens * 4; + + // 히스토리는 마지막 user 메시지만 유지 + const sysMsg = finalMessages.find(m => m.role === 'system'); + const lastUserMsg = [...finalMessages].reverse().find(m => m.role === 'user'); + + const compressedMessages: ChatMessage[] = []; + if (sysMsg) { + const sysContent = String(sysMsg.content || ''); + compressedMessages.push({ + role: 'system', + content: sysContent.length > maxSysChars + ? sysContent.slice(0, maxSysChars) + `\n[Compressed for n_ctx=${nCtx}]` + : sysContent, + internal: true + }); + } + if (lastUserMsg) { + compressedMessages.push(lastUserMsg); + } + + // 압축된 메시지로 즉시 재요청 + const retryBody = { + model: candidateModel, + messages: compressedMessages.map(m => ({ role: m.role, content: m.content })), + stream: true, + max_tokens: Math.min(1024, maxResponseTokens), + temperature, + }; + + logInfo('Retrying with compressed context.', { + originalTokens: estimatedTokens, + compressedTokens: Math.ceil(compressedMessages.reduce((a, m) => a + String(m.content || '').length, 0) / 4), + nCtx, + messageCount: compressedMessages.length + }); + + const retryResponse = await fetch(apiUrl, { + method: 'POST', + headers: { 'Content-Type': 'application/json', 'Accept': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive' }, + body: JSON.stringify(retryBody), + signal: this.abortController?.signal + }); + + if (retryResponse.ok) { + logInfo('n_ctx retry succeeded.', { apiUrl }); + return { response: retryResponse, engine, apiUrl }; + } + logError('n_ctx retry also failed.', { status: retryResponse.status }); + } + lastError = new Error(`AI Engine error (${engine}/${variant.name}): ${response.status} - ${summarizeText(errText, 300)}`); logError('AI streaming request returned non-OK status.', { engine, variant: variant.name, apiUrl, status: response.status, body: summarizeText(errText, 500) }); continue;