/** * Phase 1 — context-window alignment. * * The budgeter must clamp to the model's ACTUALLY-loaded window when it's * smaller than the user's `contextLength` setting, so a model loaded with a * smaller window than the setting never silently overflows the server. */ import { computeBudgetedRequest } from '../src/agent/handlePrompt/computeBudgetedRequest'; import type { ChatMessage } from '../src/agent'; const baseConfig = { contextLength: 32768, maxOutputTokens: 4096, contextSafetyMargin: 512, smallModelContextCap: 0, // disabled autoCompactHistory: false, }; function run(overrides: { actualContextLength?: number; config?: Partial } = {}) { const reqMessages: ChatMessage[] = [{ role: 'user', content: 'hello' }]; return computeBudgetedRequest({ fullSystemPrompt: 'You are a helpful assistant.', reqMessages, actualModel: 'some-13b-model', config: { ...baseConfig, ...overrides.config }, imageCount: 0, actualContextLength: overrides.actualContextLength, }); } describe('computeBudgetedRequest — real-window alignment', () => { test('clamps to the actual loaded window when it is smaller than the setting', () => { const r = run({ actualContextLength: 8192 }); expect(r.windowMismatch).toBe(true); expect(r.effectiveContextLength).toBe(8192); expect(r.ctxLimits.contextLength).toBe(8192); }); test('keeps the configured window when the actual window is unknown', () => { const r = run({ actualContextLength: undefined }); expect(r.windowMismatch).toBe(false); expect(r.effectiveContextLength).toBe(32768); expect(r.ctxLimits.contextLength).toBe(32768); }); test('does not raise the window when the actual window is larger than the setting', () => { const r = run({ actualContextLength: 131072 }); expect(r.windowMismatch).toBe(false); expect(r.effectiveContextLength).toBe(32768); // setting is the lower bound here }); test('ignores a non-positive / non-finite actual window (falls back to setting)', () => { expect(run({ actualContextLength: 0 }).effectiveContextLength).toBe(32768); expect(run({ actualContextLength: -5 }).effectiveContextLength).toBe(32768); expect(run({ actualContextLength: NaN }).effectiveContextLength).toBe(32768); }); });