diff --git a/PATCHNOTES.md b/PATCHNOTES.md index f798842..bb42a8b 100644 --- a/PATCHNOTES.md +++ b/PATCHNOTES.md @@ -1,5 +1,11 @@ # Astra Patch Notes +## v2.2.254 (2026-06-18) +### ๐Ÿ”Ž ๋นˆ ์‘๋‹ต(empty response) ์ง„๋‹จ ์ •ํ™•๋„ โ€” MoE ํ™œ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ ์ธ์‹ +- ์ผ๋ฐ˜ ์—์ด์ „ํŠธ ์ฑ„ํŒ…์—์„œ ์•ฝํ•œ ๋ชจ๋ธ์ด ํฐ ์ž…๋ ฅ์— ์ฒซ ํ† ํฐ EOS ๋กœ ๋ฌด๋„ˆ์ ธ **๋นˆ ์‘๋‹ต**์ด ๋‚  ๋•Œ, ๋ชจ๋ธ๋ช… ํŒŒ์„œ๊ฐ€ `gemma-4-26b-a4b` ๋ฅผ "26B ํฐ ๋ชจ๋ธ"๋กœ ์˜คํŒํ•ด ์—‰๋šฑํ•œ ์•ˆ๋‚ด๋ฅผ ํ•˜๋˜ ๋ฌธ์ œ. **ํ™œ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”์ •**(`estimateActiveParamsB`: `a4b`โ†’4, `A3B`โ†’3, `e2b`โ†’2) ์ถ”๊ฐ€ โ†’ MoE ๋ฅผ ์ •ํ™•ํžˆ ์‹๋ณ„. ([contextManager.ts](src/lib/contextManager.ts)) +- ๋นˆ ์‘๋‹ต ์—๋Ÿฌ ๋ฉ”์‹œ์ง€ ๊ฐœ์„ : (1) "**๋‹ต๋ณ€์ด ๊ธธ์–ด์„œ๊ฐ€ ์•„๋‹ˆ๋ผ ์ž…๋ ฅ์ด ๋ชจ๋ธ ์šฉ๋Ÿ‰ ๋Œ€๋น„ ์ปค์„œ**" ๋ฐœ์ƒํ•จ์„ ๋ช…์‹œ, (2) MoE ๋ฉด `์ด ~26B / ํ™œ์„ฑ ~4B` ํ‘œ๊ธฐ + ํ™œ์„ฑ 7B+ ๊ถŒ์žฅ, (3) **LM Studio ๋กœ๋“œ context length ์™€ `g1nation.contextLength` ๋ถˆ์ผ์น˜** ๊ฐ€๋Šฅ์„ฑ์„ 1์ˆœ์œ„ ์ ๊ฒ€ ํ•ญ๋ชฉ์œผ๋กœ ์•ˆ๋‚ด. ([agent.ts](src/agent.ts)) +- ์ฐธ๊ณ : `/meet` ์˜ map-reduce ์ฒญํ‚น์€ ๊ทธ ๋ช…๋ น ์ „์šฉ์ด๋ฉฐ, ์ผ๋ฐ˜ ์ฑ„ํŒ…(์ฝ”๋“œ ๋ฆฌ๋ทฐ ๋“ฑ)์—๋Š” ์ ์šฉ๋˜์ง€ ์•Š๋Š”๋‹ค(๋‹จ์ผ ์˜ˆ์‚ฐ ํ˜ธ์ถœ). ํ…Œ์ŠคํŠธ +6๊ฑด(์ „์ฒด 662 ํ†ต๊ณผ). + ## v2.2.253 (2026-06-17) ### ๐Ÿช“ /meet ์กฐ๊ฐ ์‹คํŒจ ์‹œ ์ ˆ๋ฐ˜ ๋ถ„ํ•  ์žฌ์‹œ๋„ (์•ฝํ•œ ๋ชจ๋ธ ์„ฑ๊ณต๋ฅ โ†‘) - v2.2.252 ์˜ ์žฌ์‹œ๋„(๋ฐ˜๋ณต ์–ต์ œ ๊ฐ•ํ™”)์—๋„ ์กฐ๊ฐ์ด ๊ณ„์† ๋ถ•๊ดดํ•˜๋ฉด, ๊ทธ ์กฐ๊ฐ์„ **์ค„ ๊ฒฝ๊ณ„๋กœ ์ ˆ๋ฐ˜์”ฉ ์ชผ๊ฐœ ์žฌ๊ท€ ์žฌ์‹œ๋„**ํ•œ๋‹ค(12Kโ†’6Kโ†’3.5K). ์ž…๋ ฅ์ด ์ž‘์•„์งˆ์ˆ˜๋ก ์•ฝํ•œ ๋ชจ๋ธ์˜ ์ถœ๋ ฅ ๋ถ•๊ดด ํ™•๋ฅ ์ด ๋–จ์–ด์ง€๋ฏ€๋กœ, **๋ชจ๋ธ ๊ต์ฒด ์—†์ด๋„** ์ถ”์ถœ ์„ฑ๊ณต๋ฅ ์ด ์˜ค๋ฅธ๋‹ค. ์ตœ์†Œ ํฌ๊ธฐ(3.5K) ์ดํ•˜์ธ๋ฐ๋„ ์‹คํŒจํ•˜๋Š” ๊ตฌ๊ฐ„๋งŒ ๊ฑด๋„ˆ๋›ด๋‹ค. ([handlers.ts](src/features/datacollect/handlers.ts)) diff --git a/package.json b/package.json index d83d5b0..6e8e981 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "astra", "displayName": "Astra", "description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.", - "version": "2.2.253", + "version": "2.2.254", "publisher": "g1nation", "license": "MIT", "icon": "assets/icon.png", diff --git a/src/agent.ts b/src/agent.ts index dd748c1..b533959 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -125,6 +125,7 @@ import { truncationNotice, shouldShowTruncationNotice, estimateModelParamsB, + estimateActiveParamsB, type ContextLimits, } from './lib/contextManager'; import { samplingToRestBody, type ChatStreamStats } from './lmstudio/streamer'; @@ -1215,19 +1216,18 @@ export class AgentExecutor { messageCount: messagesForRequest.length, fallbackTried: loopDepth === 0 ? 'yes' : 'no', }); - // Cheap heuristic: parse a parameter-count hint out of the - // model identifier (e.g. "google/gemma-4-e2b", "qwen2-1.5b"). - // Anything <= 3B is small enough that long-context generation - // commonly fails by emitting EOS as the first token even though - // the server log shows prompt-eval succeeded with truncated=0. - const smallModelMatch = actualModel.match(/(? 60000; // ~15k tokens of English/code - const contextLimitHint = - 'LM Studio ๋กœ๊ทธ์— `n_tokens = N, truncated = 0` ์ธ๋ฐ `eval time` ์ด 0ms ๋ผ๋ฉด ๋ชจ๋ธ์ด ์ฒซ ํ† ํฐ๋ถ€ํ„ฐ EOS ๋ฅผ ๋ฑ‰์€ ๊ฒƒ์ž…๋‹ˆ๋‹ค. ๋ณดํ†ต ์ปจํ…์ŠคํŠธ ํ•œ๊ณ„ ์ดˆ๊ณผ ๋˜๋Š” ๋ชจ๋ธ ์šฉ๋Ÿ‰ ๋ถ€์กฑ์ž…๋‹ˆ๋‹ค. ๋” ํฐ ๋ชจ๋ธ(7B+)๋กœ ๊ต์ฒดํ•˜๊ฑฐ๋‚˜ ์ปจํ…์ŠคํŠธ๋ฅผ ์ค„์—ฌ ๋ณด์„ธ์š”.'; + // ๋ชจ๋ธ ์‹๋ณ„์ž์—์„œ "ํ™œ์„ฑ(active) ํŒŒ๋ผ๋ฏธํ„ฐ" ๊ทœ๋ชจ๋ฅผ ์ถ”์ •ํ•œ๋‹ค. MoE ๋ชจ๋ธ์€ + // ์ด ํŒŒ๋ผ๋ฏธํ„ฐ(์˜ˆ: 26b)๊ฐ€ ์ปค๋„ ํ™œ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ(์˜ˆ: a4b=4)๊ฐ€ ์ž‘์•„ ๊ธด ํ”„๋กฌํ”„ํŠธ์—์„œ + // ์ฒซ ํ† ํฐ๋ถ€ํ„ฐ EOS ๋ฅผ ๋ฑ‰๋Š”๋‹ค(๋นˆ ์‘๋‹ต). ์ด ํŒŒ๋ผ๋ฏธํ„ฐ๋งŒ ๋ณด๋ฉด "26b โ†’ ํฐ ๋ชจ๋ธ"๋กœ + // ์˜คํŒํ•˜๋ฏ€๋กœ ํ™œ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ ํŒ์ •ํ•œ๋‹ค. + const activeB = estimateActiveParamsB(actualModel); + const totalB = estimateModelParamsB(actualModel); + const isMoE = activeB !== null && totalB !== null && activeB < totalB; + const capacityHint = isMoE + ? `์ด ๋ชจ๋ธ์€ MoE ๋กœ ์ถ”์ •๋ฉ๋‹ˆ๋‹ค (์ด ~${totalB}B, **ํ™œ์„ฑ ~${activeB}B**). ํ™œ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ๊ฐ€ ์ž‘์•„ ๊ธด ์ž…๋ ฅ(ํ˜„์žฌ ~${inputTokens.toLocaleString()} tokens)์—์„œ ์ฒซ ํ† ํฐ๋ถ€ํ„ฐ EOS ๋ฅผ ๋ฑ‰์–ด ๋นˆ ์‘๋‹ต์ด ๋˜๊ธฐ ์‰ฝ์Šต๋‹ˆ๋‹ค. ์ฝ”๋“œ ๋ฆฌ๋ทฐ์ฒ˜๋Ÿผ ์ž…๋ ฅ์ด ํฐ ์ž‘์—…์€ **ํ™œ์„ฑ 7B+ ๋˜๋Š” ํ•œ๊ตญ์–ด ํŠนํ™” ๋ชจ๋ธ(EXAONE/Qwen ๋“ฑ)** ์„ ๊ถŒ์žฅํ•ฉ๋‹ˆ๋‹ค.` + : '์ž…๋ ฅ์ด ํฐ ์ž‘์—…์—์„œ ๋ชจ๋ธ์ด ์ฒซ ํ† ํฐ๋ถ€ํ„ฐ EOS ๋ฅผ ๋ฑ‰์œผ๋ฉด ๋ณดํ†ต ๋ชจ๋ธ ์šฉ๋Ÿ‰ ๋ถ€์กฑ ๋˜๋Š” ์ปจํ…์ŠคํŠธ ์ดˆ๊ณผ์ž…๋‹ˆ๋‹ค. ๋” ํฐ ๋ชจ๋ธ(7B+)๋กœ ๊ต์ฒดํ•˜๊ฑฐ๋‚˜ ์ž…๋ ฅ์„ ์ค„์—ฌ ๋ณด์„ธ์š”.'; + const ctxMismatchHint = + '**LM Studio ์— ๋กœ๋“œ๋œ ์‹ค์ œ context length ๊ฐ€ Astra ์„ค์ •(`g1nation.contextLength`)๋ณด๋‹ค ์ž‘์€์ง€** ํ™•์ธํ•˜์„ธ์š”. ์˜ˆ: ์„ค์ •์€ 32768 ์ธ๋ฐ ๋ชจ๋ธ์€ 8192/16384 ๋กœ ๋กœ๋“œ๋ผ ์žˆ์œผ๋ฉด, Astra ๊ฐ€ ๊ทธ ํ•œ๋„๋ฅผ ๋„˜๊ฒจ ๋ณด๋‚ด ์„œ๋ฒ„๊ฐ€ ์ž˜๋ผ๋‚ด๊ฑฐ๋‚˜ EOS ๋ฅผ ๋ฑ‰์Šต๋‹ˆ๋‹ค. (LM Studio ๋ชจ๋ธ ๋กœ๋“œ ์˜ต์…˜์˜ Context Length ์™€ ์„ค์ •๊ฐ’์„ ์ผ์น˜)'; const looksOverflow = outputBudget.tight || inputTokens > ctxLimits.contextLength - ctxLimits.safetyMargin; this.webview.postMessage({ @@ -1235,18 +1235,19 @@ export class AgentExecutor { value: [ 'AI ์—”์ง„์ด ๋นˆ ์‘๋‹ต์„ ๋ฐ˜ํ™˜ํ–ˆ์Šต๋‹ˆ๋‹ค (์ŠคํŠธ๋ฆฌ๋ฐ + non-streaming ํด๋ฐฑ ๋ชจ๋‘ ์‹คํŒจ).', `Engine: ${engine}`, - `Model: ${actualModel}`, + `Model: ${actualModel}${isMoE ? ` (MoE: ์ด ~${totalB}B / ํ™œ์„ฑ ~${activeB}B)` : ''}`, `Prompt: ~${inputTokens.toLocaleString()} tokens (${promptCharCount.toLocaleString()} chars, ${messagesForRequest.length} messages) / context window ${ctxLimits.contextLength.toLocaleString()} tokens`, `Output budget: ${maxOutputTokens.toLocaleString()} tokens`, ...(finishStopReason ? [`Stop reason: ${finishStopReason}`] : []), '', + 'โš ๏ธ ๋นˆ ์‘๋‹ต์€ *๋‹ต๋ณ€์ด ๊ธธ์–ด์„œ*๊ฐ€ ์•„๋‹ˆ๋ผ *์ž…๋ ฅ์ด ๋ชจ๋ธ ์šฉ๋Ÿ‰์— ๋น„ํ•ด ์ปค์„œ* ๋ฐœ์ƒํ•˜๋Š” ๊ฒฝ์šฐ๊ฐ€ ๋Œ€๋ถ€๋ถ„์ž…๋‹ˆ๋‹ค (์ถœ๋ ฅ์€ ์–ด์ฐจํ”ผ ์œ„ budget ์œผ๋กœ ์ œํ•œ๋จ).', + '', '๋‹ค์Œ์„ ์‹œ๋„ํ•ด๋ณด์„ธ์š”:', - ' โ€ข LM Studio์—์„œ ๋ชจ๋ธ์ด ์‹ค์ œ๋กœ ๋กœ๋“œ๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธ', - looksOverflow - ? ' โ€ข ์ž…๋ ฅ์ด ๋ชจ๋ธ context window ์— ๊ฐ€๊น์Šต๋‹ˆ๋‹ค. `/newChat` ์œผ๋กœ ๋Œ€ํ™”๋ฅผ ์ƒˆ๋กœ ์‹œ์ž‘ํ•˜๊ฑฐ๋‚˜, Skill/Brain ์ปจํ…์ŠคํŠธ๋ฅผ ์ค„์ด๊ฑฐ๋‚˜, Settings ์˜ `g1nation.contextLength` ๋ฅผ ๋ชจ๋ธ ์‹ค์ œ ๊ฐ’์œผ๋กœ ๋งž์ถ”์„ธ์š”.' - : ' โ€ข ๋‹ค๋ฅธ ๋ชจ๋ธ๋กœ ์ „ํ™˜ํ•˜๊ฑฐ๋‚˜ LM Studio ์„œ๋ฒ„๋ฅผ ์žฌ์‹œ์ž‘', - ' โ€ข Settings์—์„œ maxContextSize / memoryLongTermFiles ์ค„์ด๊ธฐ', - ...(looksSmall || promptIsLarge ? [' โ€ข ' + contextLimitHint] : []), + ' โ€ข ' + ctxMismatchHint, + ' โ€ข ' + capacityHint, + ' โ€ข `/newChat` ์œผ๋กœ ๋Œ€ํ™”๋ฅผ ์ƒˆ๋กœ ์‹œ์ž‘ํ•˜๊ฑฐ๋‚˜, Settings ์—์„œ memoryLongTermFiles / BrainยทSkill ์ปจํ…์ŠคํŠธ๋ฅผ ์ค„์—ฌ ์ž…๋ ฅ์„ ์ถ•์†Œ', + ' โ€ข LM Studio ์—์„œ ๋ชจ๋ธ์ด ์‹ค์ œ๋กœ ๋กœ๋“œ๋ผ ์žˆ๋Š”์ง€ / ์„œ๋ฒ„ ์žฌ์‹œ์ž‘', + ...(looksOverflow ? [' โ€ข ์ž…๋ ฅ์ด context window ์— ๋งค์šฐ ๊ฐ€๊น์Šต๋‹ˆ๋‹ค โ€” ์œ„ ์ปจํ…์ŠคํŠธ ์ผ์น˜ ํ™•์ธ์ด ํŠนํžˆ ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค.'] : []), ].join('\n') }); return; diff --git a/src/lib/contextManager.ts b/src/lib/contextManager.ts index 460c6a9..965e463 100644 --- a/src/lib/contextManager.ts +++ b/src/lib/contextManager.ts @@ -73,6 +73,24 @@ export function estimateModelParamsB(modelId: string | null | undefined): number return Number.isFinite(n) && n > 0 && n < 2000 ? n : null; } +/** + * ์‹ค์ œ ์ƒ์„ฑ ๋Šฅ๋ ฅ์„ ์ขŒ์šฐํ•˜๋Š” "ํ™œ์„ฑ(active) ํŒŒ๋ผ๋ฏธํ„ฐ" ๊ทœ๋ชจ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค. MoE ๋ชจ๋ธ์€ + * ์ด ํŒŒ๋ผ๋ฏธํ„ฐ๋ณด๋‹ค ํ™œ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ๊ฐ€ ์ž‘์•„ ๊ธด ํ”„๋กฌํ”„ํŠธ์—์„œ ์ฒซ ํ† ํฐ EOS(๋นˆ ์‘๋‹ต)๋กœ + * ๋ฌด๋„ˆ์ง€๊ธฐ ์‰ฝ์Šต๋‹ˆ๋‹ค. ํ™œ์„ฑ ํ‘œ๊ธฐ(์˜ˆ: "...-a4b", "...-A22B", "gemma-3n-e2b")๊ฐ€ ์žˆ์œผ๋ฉด + * ๊ทธ ๊ฐ’์„, ์—†์œผ๋ฉด ์ด ํŒŒ๋ผ๋ฏธํ„ฐ(estimateModelParamsB)๋ฅผ ๋Œ๋ ค์ค๋‹ˆ๋‹ค. + * ์˜ˆ: "gemma-4-26b-a4b-it" โ†’ 4, "qwen3-30b-a3b" โ†’ 3, "llama-3.1-8b" โ†’ 8. + */ +export function estimateActiveParamsB(modelId: string | null | undefined): number | null { + if (!modelId) return null; + // ํ™œ์„ฑ ํ‘œ๊ธฐ: ๊ตฌ๋ถ„์ž ๋’ค a/e + ์ˆซ์ž + b (์˜ˆ: -a4b, _A22B, .e2b) + const moe = String(modelId).match(/[-_/:.\s][ae](\d+(?:\.\d+)?)\s*b(?![a-z0-9])/i); + if (moe) { + const n = Number(moe[1]); + if (Number.isFinite(n) && n > 0 && n < 2000) return n; + } + return estimateModelParamsB(modelId); +} + /** role/๊ตฌ๋ถ„์ž ๋“ฑ ๋ฉ”์‹œ์ง€ 1๊ฐœ๋‹น ๋ฐœ์ƒํ•˜๋Š” ๊ณ ์ • ์˜ค๋ฒ„ํ—ค๋“œ(๋Œ€๋žต). */ const PER_MESSAGE_TOKEN_OVERHEAD = 4; diff --git a/tests/contextManager.test.ts b/tests/contextManager.test.ts index ac3875d..7a7e8c1 100644 --- a/tests/contextManager.test.ts +++ b/tests/contextManager.test.ts @@ -7,6 +7,7 @@ import { classifyStopReason, shouldShowTruncationNotice, estimateModelParamsB, + estimateActiveParamsB, CONTEXT_OPEN_MARKER, CONTEXT_CLOSE_MARKER, type BudgetMessage, @@ -29,6 +30,22 @@ describe('contextManager.estimateModelParamsB', () => { }); }); +describe('contextManager.estimateActiveParamsB', () => { + it('prefers active params for MoE naming (a/e prefix)', () => { + expect(estimateActiveParamsB('gemma-4-26b-a4b-it')).toBe(4); // ํ™œ์„ฑ 4B (์ด 26B ์•„๋‹˜) + expect(estimateActiveParamsB('Qwen3-30B-A3B')).toBe(3); // ํ™œ์„ฑ 3B + expect(estimateActiveParamsB('google/gemma-3n-e2b-it')).toBe(2); + }); + it('falls back to total params when no active hint', () => { + expect(estimateActiveParamsB('llama-3.1-8b')).toBe(8); + expect(estimateActiveParamsB('qwen2.5-7b-instruct')).toBe(7); + }); + it('returns null when there is no parameter hint', () => { + expect(estimateActiveParamsB('phi-3-mini')).toBeNull(); + expect(estimateActiveParamsB('')).toBeNull(); + }); +}); + describe('contextManager.computeOutputBudget', () => { const limits = { contextLength: 32768, maxOutputTokens: 4096, safetyMargin: 2048, minOutputTokens: 512 }; it('caps at maxOutputTokens when there is plenty of room', () => {