1efbe2ec0f
약한 모델이 큰 입력에 첫 토큰 EOS로 빈 응답을 낼 때, 모델명 파서가 gemma-4-26b-a4b를 "26B 큰 모델"로 오판하던 문제 수정. - estimateActiveParamsB 추가: MoE 활성 파라미터 추정(a4b→4, A3B→3, e2b→2) - 빈 응답 에러 메시지 개선: 원인이 답변 길이가 아니라 입력 크기임을 명시, MoE 총/활성 파라미터 표기, LM Studio 로드 context length 불일치 1순위 점검 안내 - 테스트 +6건(전체 662 통과) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
167 lines
8.4 KiB
TypeScript
167 lines
8.4 KiB
TypeScript
import {
|
|
estimateTokens,
|
|
estimateMessagesTokens,
|
|
computeOutputBudget,
|
|
trimHistoryToBudget,
|
|
truncateSystemPromptContext,
|
|
classifyStopReason,
|
|
shouldShowTruncationNotice,
|
|
estimateModelParamsB,
|
|
estimateActiveParamsB,
|
|
CONTEXT_OPEN_MARKER,
|
|
CONTEXT_CLOSE_MARKER,
|
|
type BudgetMessage,
|
|
} from '../src/lib/contextManager';
|
|
|
|
describe('contextManager.estimateModelParamsB', () => {
|
|
it('reads common naming schemes', () => {
|
|
expect(estimateModelParamsB('qwen2.5-7b-instruct')).toBe(7);
|
|
expect(estimateModelParamsB('llama-3.1-8b')).toBe(8);
|
|
expect(estimateModelParamsB('google/gemma-3n-e2b-it')).toBe(2);
|
|
expect(estimateModelParamsB('gemma4:e2b')).toBe(2);
|
|
expect(estimateModelParamsB('Qwen3-30B-A3B')).toBe(30);
|
|
});
|
|
it('returns null when there is no clear parameter hint', () => {
|
|
expect(estimateModelParamsB('phi-3-mini')).toBeNull();
|
|
expect(estimateModelParamsB('gpt-4o')).toBeNull();
|
|
expect(estimateModelParamsB('')).toBeNull();
|
|
expect(estimateModelParamsB('llama-q4bit')).toBeNull(); // quantization, not params
|
|
expect(estimateModelParamsB('mixtral-8x7b')).toBeNull(); // MoE size is ambiguous — don't guess
|
|
});
|
|
});
|
|
|
|
describe('contextManager.estimateActiveParamsB', () => {
|
|
it('prefers active params for MoE naming (a/e prefix)', () => {
|
|
expect(estimateActiveParamsB('gemma-4-26b-a4b-it')).toBe(4); // 활성 4B (총 26B 아님)
|
|
expect(estimateActiveParamsB('Qwen3-30B-A3B')).toBe(3); // 활성 3B
|
|
expect(estimateActiveParamsB('google/gemma-3n-e2b-it')).toBe(2);
|
|
});
|
|
it('falls back to total params when no active hint', () => {
|
|
expect(estimateActiveParamsB('llama-3.1-8b')).toBe(8);
|
|
expect(estimateActiveParamsB('qwen2.5-7b-instruct')).toBe(7);
|
|
});
|
|
it('returns null when there is no parameter hint', () => {
|
|
expect(estimateActiveParamsB('phi-3-mini')).toBeNull();
|
|
expect(estimateActiveParamsB('')).toBeNull();
|
|
});
|
|
});
|
|
|
|
describe('contextManager.computeOutputBudget', () => {
|
|
const limits = { contextLength: 32768, maxOutputTokens: 4096, safetyMargin: 2048, minOutputTokens: 512 };
|
|
it('caps at maxOutputTokens when there is plenty of room', () => {
|
|
const r = computeOutputBudget(1000, limits);
|
|
expect(r.maxOutputTokens).toBe(4096);
|
|
expect(r.tight).toBe(false);
|
|
});
|
|
it('shrinks output as input grows', () => {
|
|
const r = computeOutputBudget(30000, limits); // 32768 - 30000 - 2048 = 720
|
|
expect(r.maxOutputTokens).toBe(720);
|
|
expect(r.tight).toBe(false);
|
|
});
|
|
it('flags tight and floors at minOutputTokens when input nearly fills the window', () => {
|
|
const r = computeOutputBudget(31000, limits); // available 32768-31000-2048 = -280 ≤ 512
|
|
expect(r.maxOutputTokens).toBe(512);
|
|
expect(r.tight).toBe(true);
|
|
});
|
|
});
|
|
|
|
describe('contextManager.trimHistoryToBudget', () => {
|
|
// v2.2.69: makeMarker now also receives the dropped messages array so callers can build a real summary.
|
|
// Tests don't need the dropped payload — just keep the signature compatible.
|
|
const marker = (n: number, _dropped?: BudgetMessage[]): BudgetMessage => ({ role: 'system', content: `[dropped ${n}]`, internal: true });
|
|
it('keeps everything when under budget', () => {
|
|
const msgs: BudgetMessage[] = [{ role: 'user', content: 'hi' }, { role: 'assistant', content: 'hello' }];
|
|
const r = trimHistoryToBudget(msgs, 10_000, marker);
|
|
expect(r.droppedCount).toBe(0);
|
|
expect(r.messages).toEqual(msgs);
|
|
});
|
|
it('drops oldest messages and prepends a marker when over budget', () => {
|
|
const msgs: BudgetMessage[] = Array.from({ length: 10 }, (_, i) => ({ role: i % 2 ? 'assistant' : 'user', content: 'x'.repeat(400) }));
|
|
const r = trimHistoryToBudget(msgs, 250, marker); // each msg ≈ 400*0.3+4 = 124 tokens
|
|
expect(r.droppedCount).toBeGreaterThan(0);
|
|
expect(r.messages[0].content).toMatch(/^\[dropped \d+\]$/);
|
|
// most recent message survives
|
|
expect(r.messages[r.messages.length - 1]).toEqual(msgs[msgs.length - 1]);
|
|
expect(r.tokensAfter).toBeLessThanOrEqual(250 + estimateMessagesTokens([marker(1, [])]));
|
|
});
|
|
it('passes the dropped messages array to the marker factory (v2.2.69)', () => {
|
|
const msgs: BudgetMessage[] = Array.from({ length: 6 }, (_, i) => ({
|
|
role: i % 2 ? 'assistant' : 'user',
|
|
content: 'x'.repeat(400),
|
|
}));
|
|
let observedDropped: BudgetMessage[] | undefined;
|
|
const factory = (n: number, dropped: BudgetMessage[]): BudgetMessage => {
|
|
observedDropped = dropped;
|
|
return { role: 'system', content: `[summary of ${n}: first=${dropped[0]?.role}]`, internal: true };
|
|
};
|
|
const r = trimHistoryToBudget(msgs, 250, factory);
|
|
expect(r.droppedCount).toBeGreaterThan(0);
|
|
expect(observedDropped).toBeDefined();
|
|
expect(observedDropped!.length).toBe(r.droppedCount);
|
|
// Dropped messages are the OLDEST ones, in order.
|
|
expect(observedDropped![0]).toEqual(msgs[0]);
|
|
expect(r.messages[0].content).toMatch(/^\[summary of \d+: first=user\]$/);
|
|
});
|
|
it('always keeps at least the last message even if it alone exceeds the budget', () => {
|
|
const msgs: BudgetMessage[] = [{ role: 'user', content: 'short' }, { role: 'user', content: 'y'.repeat(5000) }];
|
|
const r = trimHistoryToBudget(msgs, 10, marker);
|
|
expect(r.messages.some(m => m.content === 'y'.repeat(5000))).toBe(true);
|
|
});
|
|
});
|
|
|
|
describe('contextManager.truncateSystemPromptContext', () => {
|
|
it('leaves a small prompt untouched', () => {
|
|
const p = 'You are helpful.';
|
|
expect(truncateSystemPromptContext(p, 1000)).toEqual({ prompt: p, truncated: false });
|
|
});
|
|
it('trims only the [CONTEXT]…[/CONTEXT] body, preserving head and tail', () => {
|
|
const head = 'CORE INSTRUCTIONS that must never be dropped. ' + 'a'.repeat(200);
|
|
const body = 'BIG BRAIN CONTEXT ' + 'b'.repeat(20_000);
|
|
const tail = 'CRITICAL NEGATIVE CONSTRAINTS — also never dropped. ' + 'c'.repeat(200);
|
|
const prompt = head + CONTEXT_OPEN_MARKER + body + CONTEXT_CLOSE_MARKER + tail;
|
|
const out = truncateSystemPromptContext(prompt, 400);
|
|
expect(out.truncated).toBe(true);
|
|
expect(out.prompt).toContain('CORE INSTRUCTIONS');
|
|
expect(out.prompt).toContain('CRITICAL NEGATIVE CONSTRAINTS');
|
|
expect(out.prompt).toContain(CONTEXT_CLOSE_MARKER.trim());
|
|
// The bulk of the body is gone
|
|
expect(out.prompt.length).toBeLessThan(prompt.length / 2);
|
|
expect(estimateTokens(out.prompt)).toBeLessThanOrEqual(400 + estimateTokens(tail) + 64);
|
|
});
|
|
it('falls back to a hard tail-cut when there is no [CONTEXT] marker', () => {
|
|
const prompt = 'instructions ' + 'z'.repeat(50_000);
|
|
const out = truncateSystemPromptContext(prompt, 200);
|
|
expect(out.truncated).toBe(true);
|
|
expect(out.prompt.length).toBeLessThan(prompt.length);
|
|
expect(out.prompt.startsWith('instructions')).toBe(true);
|
|
});
|
|
});
|
|
|
|
describe('contextManager.classifyStopReason', () => {
|
|
it('maps engine-specific reasons to common kinds', () => {
|
|
expect(classifyStopReason('eosFound')).toBe('complete');
|
|
expect(classifyStopReason('stop')).toBe('complete');
|
|
expect(classifyStopReason('length')).toBe('output-limit');
|
|
expect(classifyStopReason('maxPredictedTokensReached')).toBe('output-limit');
|
|
expect(classifyStopReason('contextLengthReached')).toBe('context-overflow');
|
|
expect(classifyStopReason('userStopped')).toBe('user-stopped');
|
|
expect(classifyStopReason('failed')).toBe('error');
|
|
expect(classifyStopReason(undefined)).toBe('unknown');
|
|
});
|
|
});
|
|
|
|
describe('contextManager.shouldShowTruncationNotice', () => {
|
|
it('suppresses output-limit notices for visibly short answers', () => {
|
|
expect(shouldShowTruncationNotice('output-limit', 80, 4096)).toBe(false);
|
|
});
|
|
|
|
it('shows output-limit notices when output consumed most of the budget', () => {
|
|
expect(shouldShowTruncationNotice('output-limit', 3900, 4096)).toBe(true);
|
|
});
|
|
|
|
it('always surfaces context overflow and error stops', () => {
|
|
expect(shouldShowTruncationNotice('context-overflow', 10, 4096)).toBe(true);
|
|
expect(shouldShowTruncationNotice('error', 10, 4096)).toBe(true);
|
|
});
|
|
});
|