import { estimateTokens, estimateMessagesTokens, computeOutputBudget, trimHistoryToBudget, truncateSystemPromptContext, classifyStopReason, shouldShowTruncationNotice, estimateModelParamsB, CONTEXT_OPEN_MARKER, CONTEXT_CLOSE_MARKER, type BudgetMessage, } from '../src/lib/contextManager'; describe('contextManager.estimateModelParamsB', () => { it('reads common naming schemes', () => { expect(estimateModelParamsB('qwen2.5-7b-instruct')).toBe(7); expect(estimateModelParamsB('llama-3.1-8b')).toBe(8); expect(estimateModelParamsB('google/gemma-3n-e2b-it')).toBe(2); expect(estimateModelParamsB('gemma4:e2b')).toBe(2); expect(estimateModelParamsB('Qwen3-30B-A3B')).toBe(30); }); it('returns null when there is no clear parameter hint', () => { expect(estimateModelParamsB('phi-3-mini')).toBeNull(); expect(estimateModelParamsB('gpt-4o')).toBeNull(); expect(estimateModelParamsB('')).toBeNull(); expect(estimateModelParamsB('llama-q4bit')).toBeNull(); // quantization, not params expect(estimateModelParamsB('mixtral-8x7b')).toBeNull(); // MoE size is ambiguous — don't guess }); }); describe('contextManager.computeOutputBudget', () => { const limits = { contextLength: 32768, maxOutputTokens: 4096, safetyMargin: 2048, minOutputTokens: 512 }; it('caps at maxOutputTokens when there is plenty of room', () => { const r = computeOutputBudget(1000, limits); expect(r.maxOutputTokens).toBe(4096); expect(r.tight).toBe(false); }); it('shrinks output as input grows', () => { const r = computeOutputBudget(30000, limits); // 32768 - 30000 - 2048 = 720 expect(r.maxOutputTokens).toBe(720); expect(r.tight).toBe(false); }); it('flags tight and floors at minOutputTokens when input nearly fills the window', () => { const r = computeOutputBudget(31000, limits); // available 32768-31000-2048 = -280 ≤ 512 expect(r.maxOutputTokens).toBe(512); expect(r.tight).toBe(true); }); }); describe('contextManager.trimHistoryToBudget', () => { const marker = (n: number): BudgetMessage => ({ role: 'system', content: `[dropped ${n}]`, internal: true }); it('keeps everything when under budget', () => { const msgs: BudgetMessage[] = [{ role: 'user', content: 'hi' }, { role: 'assistant', content: 'hello' }]; const r = trimHistoryToBudget(msgs, 10_000, marker); expect(r.droppedCount).toBe(0); expect(r.messages).toEqual(msgs); }); it('drops oldest messages and prepends a marker when over budget', () => { const msgs: BudgetMessage[] = Array.from({ length: 10 }, (_, i) => ({ role: i % 2 ? 'assistant' : 'user', content: 'x'.repeat(400) })); const r = trimHistoryToBudget(msgs, 250, marker); // each msg ≈ 400*0.3+4 = 124 tokens expect(r.droppedCount).toBeGreaterThan(0); expect(r.messages[0].content).toMatch(/^\[dropped \d+\]$/); // most recent message survives expect(r.messages[r.messages.length - 1]).toEqual(msgs[msgs.length - 1]); expect(r.tokensAfter).toBeLessThanOrEqual(250 + estimateMessagesTokens([marker(1)])); }); it('always keeps at least the last message even if it alone exceeds the budget', () => { const msgs: BudgetMessage[] = [{ role: 'user', content: 'short' }, { role: 'user', content: 'y'.repeat(5000) }]; const r = trimHistoryToBudget(msgs, 10, marker); expect(r.messages.some(m => m.content === 'y'.repeat(5000))).toBe(true); }); }); describe('contextManager.truncateSystemPromptContext', () => { it('leaves a small prompt untouched', () => { const p = 'You are helpful.'; expect(truncateSystemPromptContext(p, 1000)).toEqual({ prompt: p, truncated: false }); }); it('trims only the [CONTEXT]…[/CONTEXT] body, preserving head and tail', () => { const head = 'CORE INSTRUCTIONS that must never be dropped. ' + 'a'.repeat(200); const body = 'BIG BRAIN CONTEXT ' + 'b'.repeat(20_000); const tail = 'CRITICAL NEGATIVE CONSTRAINTS — also never dropped. ' + 'c'.repeat(200); const prompt = head + CONTEXT_OPEN_MARKER + body + CONTEXT_CLOSE_MARKER + tail; const out = truncateSystemPromptContext(prompt, 400); expect(out.truncated).toBe(true); expect(out.prompt).toContain('CORE INSTRUCTIONS'); expect(out.prompt).toContain('CRITICAL NEGATIVE CONSTRAINTS'); expect(out.prompt).toContain(CONTEXT_CLOSE_MARKER.trim()); // The bulk of the body is gone expect(out.prompt.length).toBeLessThan(prompt.length / 2); expect(estimateTokens(out.prompt)).toBeLessThanOrEqual(400 + estimateTokens(tail) + 64); }); it('falls back to a hard tail-cut when there is no [CONTEXT] marker', () => { const prompt = 'instructions ' + 'z'.repeat(50_000); const out = truncateSystemPromptContext(prompt, 200); expect(out.truncated).toBe(true); expect(out.prompt.length).toBeLessThan(prompt.length); expect(out.prompt.startsWith('instructions')).toBe(true); }); }); describe('contextManager.classifyStopReason', () => { it('maps engine-specific reasons to common kinds', () => { expect(classifyStopReason('eosFound')).toBe('complete'); expect(classifyStopReason('stop')).toBe('complete'); expect(classifyStopReason('length')).toBe('output-limit'); expect(classifyStopReason('maxPredictedTokensReached')).toBe('output-limit'); expect(classifyStopReason('contextLengthReached')).toBe('context-overflow'); expect(classifyStopReason('userStopped')).toBe('user-stopped'); expect(classifyStopReason('failed')).toBe('error'); expect(classifyStopReason(undefined)).toBe('unknown'); }); }); describe('contextManager.shouldShowTruncationNotice', () => { it('suppresses output-limit notices for visibly short answers', () => { expect(shouldShowTruncationNotice('output-limit', 80, 4096)).toBe(false); }); it('shows output-limit notices when output consumed most of the budget', () => { expect(shouldShowTruncationNotice('output-limit', 3900, 4096)).toBe(true); }); it('always surfaces context overflow and error stops', () => { expect(shouldShowTruncationNotice('context-overflow', 10, 4096)).toBe(true); expect(shouldShowTruncationNotice('error', 10, 4096)).toBe(true); }); });