fix: n_ctx retry in createStreamingRequest - compress msgs at API level not handlePrompt

This commit is contained in:
2026-05-07 15:48:13 +09:00
parent f190ea41ff
commit faf3060ae7
7 changed files with 86 additions and 52 deletions
@@ -1,5 +1,5 @@
{ {
"result": "Final report with inconsistencies. This should be long enough to pass validation.", "result": "Final report with inconsistencies. This should be long enough to pass validation.",
"createdAt": 1778136180630, "createdAt": 1778136474568,
"modelVersion": "unknown" "modelVersion": "unknown"
} }
@@ -1,5 +1,5 @@
{ {
"result": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.", "result": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.",
"createdAt": 1778136180628, "createdAt": 1778136474566,
"modelVersion": "unknown" "modelVersion": "unknown"
} }
@@ -1,5 +1,5 @@
{ {
"result": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.", "result": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.",
"createdAt": 1778136180626, "createdAt": 1778136474564,
"modelVersion": "unknown" "modelVersion": "unknown"
} }
@@ -1,5 +1,5 @@
{ {
"result": "---\nid: stress_conflict_1778136180610\ndate: 2026-05-07T06:43:00.632Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (14ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (3ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (2ms)\n", "result": "---\nid: stress_conflict_1778136474544\ndate: 2026-05-07T06:47:54.569Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (18ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (3ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (2ms)\n",
"createdAt": 1778136180632, "createdAt": 1778136474569,
"modelVersion": "unknown" "modelVersion": "unknown"
} }
@@ -1,8 +1,8 @@
{ {
"missionId": "stress_conflict_1778136180610", "missionId": "stress_conflict_1778136474544",
"status": "completed", "status": "completed",
"startTime": "2026-05-07T06:43:00.610Z", "startTime": "2026-05-07T06:47:54.544Z",
"totalElapsedMs": 23, "totalElapsedMs": 26,
"results": { "results": {
"planner": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.", "planner": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.",
"researcher": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.", "researcher": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.",
@@ -16,30 +16,30 @@
{ {
"from": "idle", "from": "idle",
"to": "planner", "to": "planner",
"durationMs": 14, "durationMs": 18,
"message": "전략 수립 중...", "message": "전략 수립 중...",
"ts": "2026-05-07T06:43:00.624Z" "ts": "2026-05-07T06:47:54.562Z"
}, },
{ {
"from": "planner", "from": "planner",
"to": "researcher", "to": "researcher",
"durationMs": 3, "durationMs": 3,
"message": "핵심 정보 수집 및 분석 중...", "message": "핵심 정보 수집 및 분석 중...",
"ts": "2026-05-07T06:43:00.627Z" "ts": "2026-05-07T06:47:54.565Z"
}, },
{ {
"from": "researcher", "from": "researcher",
"to": "writer", "to": "writer",
"durationMs": 2, "durationMs": 2,
"message": "최종 리포트 작성 및 편집 중...", "message": "최종 리포트 작성 및 편집 중...",
"ts": "2026-05-07T06:43:00.629Z" "ts": "2026-05-07T06:47:54.567Z"
}, },
{ {
"from": "writer", "from": "writer",
"to": "completed", "to": "completed",
"durationMs": 4, "durationMs": 3,
"message": "미션 완료", "message": "미션 완료",
"ts": "2026-05-07T06:43:00.633Z" "ts": "2026-05-07T06:47:54.570Z"
} }
], ],
"resilienceMetrics": { "resilienceMetrics": {
+1 -1
View File
@@ -2,7 +2,7 @@
"name": "astra", "name": "astra",
"displayName": "Astra", "displayName": "Astra",
"description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.", "description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.",
"version": "2.80.13", "version": "2.80.14",
"publisher": "g1nation", "publisher": "g1nation",
"license": "MIT", "license": "MIT",
"icon": "assets/icon.png", "icon": "assets/icon.png",
+71 -37
View File
@@ -526,38 +526,7 @@ export class AgentExecutor {
logInfo('Generation aborted by user.'); logInfo('Generation aborted by user.');
} else { } else {
logError('Stream reading error.', { engine, apiUrl, error: err?.message || String(err) }); logError('Stream reading error.', { engine, apiUrl, error: err?.message || String(err) });
// LM Studio llama.cpp n_keep > n_ctx 에러 감지 → 자동 압축 재시도 this.webview?.postMessage({ type: 'error', value: `Connection lost: ${err.message}` });
const errMsg = String(err?.message || err || '');
const nCtxMatch = errMsg.match(/n_keep\s*:\s*(\d+)\s*>=?\s*n_ctx\s*:\s*(\d+)/);
if (nCtxMatch && loopDepth < 1) {
// loopDepth < 1: 재시도는 최초 1회만 (무한루프 방지)
const nCtx = parseInt(nCtxMatch[2], 10);
const nKeep = parseInt(nCtxMatch[1], 10);
logInfo('n_ctx overflow → auto-retrying with compressed system prompt.', { nKeep, nCtx });
// 시스템 프롬프트를 n_ctx - 응답여유(512토큰) 범위로 강제 압축
const maxSysChars = Math.max(800, (nCtx - 512)) * 4;
const compressedSysPrompt = systemPrompt.length > maxSysChars
? systemPrompt.slice(0, maxSysChars) + `\n[System prompt compressed: n_ctx=${nCtx}]`
: systemPrompt;
this.webview?.postMessage({
type: 'streamChunk',
value: `\n⚠️ *LM Studio n_ctx=${nCtx} 감지. 컨텍스트 압축 후 재시도 중...*\n`
});
await this.handlePrompt(prompt, modelName, {
...options,
loopDepth: loopDepth + 1, // 재시도 플래그
runId,
systemPrompt: compressedSysPrompt
});
} else if (nCtxMatch) {
// 재시도 이미 했는데도 실패한 경우
this.webview?.postMessage({
type: 'error',
value: `LM Studio n_ctx(${nCtxMatch[2]}) 초과: 압축 재시도도 실패했습니다. LM Studio에서 모델을 재로드하세요.`
});
} else {
this.webview?.postMessage({ type: 'error', value: `Connection lost: ${err.message}` });
}
} }
} }
@@ -2044,6 +2013,7 @@ export class AgentExecutor {
const primaryEngine = resolveEngine(baseUrl); const primaryEngine = resolveEngine(baseUrl);
const engines = primaryEngine === 'lmstudio' ? ['lmstudio', 'ollama'] as const : ['ollama', 'lmstudio'] as const; const engines = primaryEngine === 'lmstudio' ? ['lmstudio', 'ollama'] as const : ['ollama', 'lmstudio'] as const;
let lastError: Error | null = null; let lastError: Error | null = null;
let nCtxRetried = false; // n_ctx 재시도 1회 제한
for (const engine of engines) { for (const engine of engines) {
const apiUrl = buildApiUrl(baseUrl, engine, 'chat'); const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
@@ -2052,11 +2022,13 @@ export class AgentExecutor {
for (const candidateModel of modelCandidates) { for (const candidateModel of modelCandidates) {
for (const variant of messageVariants) { for (const variant of messageVariants) {
const totalChars = variant.messages.reduce((acc, m) => acc + String(m.content || '').length, 0); // 실제 전송할 메시지 (n_ctx 재시도 시 수정됨)
let finalMessages = variant.messages;
const totalChars = finalMessages.reduce((acc, m) => acc + String(m.content || '').length, 0);
const estimatedTokens = Math.ceil(totalChars / 4); const estimatedTokens = Math.ceil(totalChars / 4);
const streamBody = { const streamBody = {
model: candidateModel, model: candidateModel,
messages: variant.messages, messages: finalMessages,
stream: true, stream: true,
...(engine === 'lmstudio' ...(engine === 'lmstudio'
? { max_tokens: 4096, temperature } ? { max_tokens: 4096, temperature }
@@ -2065,10 +2037,10 @@ export class AgentExecutor {
logInfo('AI streaming request started.', { logInfo('AI streaming request started.', {
engine, apiUrl, model: candidateModel, engine, apiUrl, model: candidateModel,
variant: variant.name, variant: variant.name,
messageCount: variant.messages.length, messageCount: finalMessages.length,
estimatedTokens, estimatedTokens,
roles: variant.messages.map(message => message.role), roles: finalMessages.map(message => message.role),
firstUserPreview: summarizeText(String(variant.messages.find(message => message.role === 'user')?.content || ''), 300) firstUserPreview: summarizeText(String(finalMessages.find(message => message.role === 'user')?.content || ''), 300)
}); });
try { try {
@@ -2086,6 +2058,68 @@ export class AgentExecutor {
if (!response.ok) { if (!response.ok) {
const errText = await response.text(); const errText = await response.text();
// ── LM Studio n_keep >= n_ctx 에러 감지 및 자동 재시도 ──
const nCtxMatch = errText.match(/n_keep\s*:\s*(\d+)\s*>=?\s*n_ctx\s*:\s*(\d+)/);
if (nCtxMatch && engine === 'lmstudio' && !nCtxRetried) {
nCtxRetried = true;
const nCtx = parseInt(nCtxMatch[2], 10);
logInfo(`n_ctx overflow detected (n_ctx=${nCtx}). Compressing messages and retrying...`);
// system 메시지를 n_ctx 크기에 맞게 강제 압축
const maxResponseTokens = 512;
const maxSysTokens = Math.max(500, nCtx - maxResponseTokens);
const maxSysChars = maxSysTokens * 4;
// 히스토리는 마지막 user 메시지만 유지
const sysMsg = finalMessages.find(m => m.role === 'system');
const lastUserMsg = [...finalMessages].reverse().find(m => m.role === 'user');
const compressedMessages: ChatMessage[] = [];
if (sysMsg) {
const sysContent = String(sysMsg.content || '');
compressedMessages.push({
role: 'system',
content: sysContent.length > maxSysChars
? sysContent.slice(0, maxSysChars) + `\n[Compressed for n_ctx=${nCtx}]`
: sysContent,
internal: true
});
}
if (lastUserMsg) {
compressedMessages.push(lastUserMsg);
}
// 압축된 메시지로 즉시 재요청
const retryBody = {
model: candidateModel,
messages: compressedMessages.map(m => ({ role: m.role, content: m.content })),
stream: true,
max_tokens: Math.min(1024, maxResponseTokens),
temperature,
};
logInfo('Retrying with compressed context.', {
originalTokens: estimatedTokens,
compressedTokens: Math.ceil(compressedMessages.reduce((a, m) => a + String(m.content || '').length, 0) / 4),
nCtx,
messageCount: compressedMessages.length
});
const retryResponse = await fetch(apiUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json', 'Accept': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive' },
body: JSON.stringify(retryBody),
signal: this.abortController?.signal
});
if (retryResponse.ok) {
logInfo('n_ctx retry succeeded.', { apiUrl });
return { response: retryResponse, engine, apiUrl };
}
logError('n_ctx retry also failed.', { status: retryResponse.status });
}
lastError = new Error(`AI Engine error (${engine}/${variant.name}): ${response.status} - ${summarizeText(errText, 300)}`); lastError = new Error(`AI Engine error (${engine}/${variant.name}): ${response.status} - ${summarizeText(errText, 300)}`);
logError('AI streaming request returned non-OK status.', { engine, variant: variant.name, apiUrl, status: response.status, body: summarizeText(errText, 500) }); logError('AI streaming request returned non-OK status.', { engine, variant: variant.name, apiUrl, status: response.status, body: summarizeText(errText, 500) });
continue; continue;