chore: version up to 2.80.34 and package
This commit is contained in:
@@ -0,0 +1,104 @@
|
||||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
import { getBrainTokenIndex, clearBrainTokenIndex } from '../src/retrieval/brainIndex';
|
||||
|
||||
function mkTmpBrain(): string {
|
||||
return fs.mkdtempSync(path.join(os.tmpdir(), 'astra-brain-'));
|
||||
}
|
||||
function writeMd(brain: string, rel: string, content: string): string {
|
||||
const p = path.join(brain, rel);
|
||||
fs.mkdirSync(path.dirname(p), { recursive: true });
|
||||
fs.writeFileSync(p, content, 'utf8');
|
||||
return p;
|
||||
}
|
||||
function bumpMtime(file: string): void {
|
||||
// Some CI / fast machines write within the same ms — force a distinct mtime.
|
||||
const t = new Date(Date.now() + 5000);
|
||||
fs.utimesSync(file, t, t);
|
||||
}
|
||||
|
||||
describe('brainIndex.getBrainTokenIndex', () => {
|
||||
let brain: string;
|
||||
beforeEach(() => { brain = mkTmpBrain(); });
|
||||
afterEach(() => {
|
||||
clearBrainTokenIndex(brain);
|
||||
try { fs.rmSync(brain, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
});
|
||||
|
||||
it('tokenizes files and returns one entry per file', () => {
|
||||
const a = writeMd(brain, 'architecture-overview.md', '# Architecture overview\nThis describes the system architecture and design.');
|
||||
const b = writeMd(brain, 'records/bug-report.md', '# Bug report\n이 설계는 기존 구조와 충돌 위험이 있습니다.');
|
||||
const out = getBrainTokenIndex(brain, [a, b]);
|
||||
expect(out).toHaveLength(2);
|
||||
const byPath = new Map(out.map(d => [d.filePath, d]));
|
||||
expect(byPath.get(a)!.tokens).toContain('architecture');
|
||||
expect(byPath.get(a)!.tokens).toContain('design');
|
||||
expect(byPath.get(a)!.titleTokens.length).toBeGreaterThan(0);
|
||||
expect(byPath.get(b)!.relativePath).toBe(path.join('records', 'bug-report.md'));
|
||||
expect(byPath.get(b)!.conflictCount).toBeGreaterThan(0); // "충돌" is a conflict indicator
|
||||
expect(byPath.get(a)!.conflictCount).toBe(0);
|
||||
});
|
||||
|
||||
it('reuses cached tokens for unchanged files and re-indexes only changed ones', () => {
|
||||
const a = writeMd(brain, 'alpha.md', 'alpha keyword stays the same here');
|
||||
const b = writeMd(brain, 'beta.md', 'beta original wording goes here');
|
||||
const first = getBrainTokenIndex(brain, [a, b]);
|
||||
const aTokensRef = first.find(d => d.filePath === a)!.tokens;
|
||||
|
||||
// Re-call without changes — `a` should hand back the *same array reference* (served from cache).
|
||||
const second = getBrainTokenIndex(brain, [a, b]);
|
||||
expect(second.find(d => d.filePath === a)!.tokens).toBe(aTokensRef);
|
||||
|
||||
// Change b.
|
||||
fs.writeFileSync(b, 'gamma replaced everything delta', 'utf8');
|
||||
bumpMtime(b);
|
||||
const third = getBrainTokenIndex(brain, [a, b]);
|
||||
const bTokens = third.find(d => d.filePath === b)!.tokens;
|
||||
expect(bTokens).toEqual(expect.arrayContaining(['gamma', 'delta']));
|
||||
expect(bTokens).not.toContain('original');
|
||||
// a still cached & unchanged.
|
||||
expect(third.find(d => d.filePath === a)!.tokens).toBe(aTokensRef);
|
||||
});
|
||||
|
||||
it('skips files that vanished between listing and reading', () => {
|
||||
const a = writeMd(brain, 'present.md', 'present content');
|
||||
const ghost = path.join(brain, 'ghost.md');
|
||||
const out = getBrainTokenIndex(brain, [a, ghost]);
|
||||
expect(out.map(d => d.filePath)).toEqual([a]);
|
||||
});
|
||||
|
||||
it('re-indexes a file that was deleted and recreated (mtime/size mismatch)', () => {
|
||||
const a = writeMd(brain, 'one.md', 'one content');
|
||||
const two = writeMd(brain, 'two.md', 'two original content');
|
||||
getBrainTokenIndex(brain, [a, two]);
|
||||
fs.rmSync(two);
|
||||
getBrainTokenIndex(brain, [a]);
|
||||
const recreated = writeMd(brain, 'two.md', 'completely different replacement content');
|
||||
bumpMtime(recreated);
|
||||
const out = getBrainTokenIndex(brain, [a, recreated]);
|
||||
expect(out.find(d => d.filePath === recreated)!.tokens).toContain('completely');
|
||||
expect(out.find(d => d.filePath === recreated)!.tokens).not.toContain('original');
|
||||
});
|
||||
|
||||
it('handles empty/invalid input gracefully', () => {
|
||||
expect(getBrainTokenIndex('', ['x'])).toEqual([]);
|
||||
expect(getBrainTokenIndex(brain, [])).toEqual([]);
|
||||
});
|
||||
|
||||
it('persists the index to <brain>/.astra/brain-index.json (debounced) and adds a .gitignore', (done) => {
|
||||
const a = writeMd(brain, 'persisted.md', 'persist me to disk');
|
||||
getBrainTokenIndex(brain, [a]);
|
||||
setTimeout(() => {
|
||||
try {
|
||||
const astraDir = path.join(brain, '.astra');
|
||||
expect(fs.existsSync(path.join(astraDir, 'brain-index.json'))).toBe(true);
|
||||
expect(fs.readFileSync(path.join(astraDir, '.gitignore'), 'utf8')).toContain('*');
|
||||
const persisted = JSON.parse(fs.readFileSync(path.join(astraDir, 'brain-index.json'), 'utf8'));
|
||||
expect(persisted.version).toBeGreaterThanOrEqual(1);
|
||||
expect(persisted.entries[a].tokens).toContain('persist');
|
||||
done();
|
||||
} catch (e) { done(e as Error); }
|
||||
}, 2200);
|
||||
}, 6000);
|
||||
});
|
||||
@@ -0,0 +1,113 @@
|
||||
import {
|
||||
estimateTokens,
|
||||
estimateMessagesTokens,
|
||||
computeOutputBudget,
|
||||
trimHistoryToBudget,
|
||||
truncateSystemPromptContext,
|
||||
classifyStopReason,
|
||||
estimateModelParamsB,
|
||||
CONTEXT_OPEN_MARKER,
|
||||
CONTEXT_CLOSE_MARKER,
|
||||
type BudgetMessage,
|
||||
} from '../src/lib/contextManager';
|
||||
|
||||
describe('contextManager.estimateModelParamsB', () => {
|
||||
it('reads common naming schemes', () => {
|
||||
expect(estimateModelParamsB('qwen2.5-7b-instruct')).toBe(7);
|
||||
expect(estimateModelParamsB('llama-3.1-8b')).toBe(8);
|
||||
expect(estimateModelParamsB('google/gemma-3n-e2b-it')).toBe(2);
|
||||
expect(estimateModelParamsB('gemma4:e2b')).toBe(2);
|
||||
expect(estimateModelParamsB('Qwen3-30B-A3B')).toBe(30);
|
||||
});
|
||||
it('returns null when there is no clear parameter hint', () => {
|
||||
expect(estimateModelParamsB('phi-3-mini')).toBeNull();
|
||||
expect(estimateModelParamsB('gpt-4o')).toBeNull();
|
||||
expect(estimateModelParamsB('')).toBeNull();
|
||||
expect(estimateModelParamsB('llama-q4bit')).toBeNull(); // quantization, not params
|
||||
expect(estimateModelParamsB('mixtral-8x7b')).toBeNull(); // MoE size is ambiguous — don't guess
|
||||
});
|
||||
});
|
||||
|
||||
describe('contextManager.computeOutputBudget', () => {
|
||||
const limits = { contextLength: 32768, maxOutputTokens: 4096, safetyMargin: 2048, minOutputTokens: 512 };
|
||||
it('caps at maxOutputTokens when there is plenty of room', () => {
|
||||
const r = computeOutputBudget(1000, limits);
|
||||
expect(r.maxOutputTokens).toBe(4096);
|
||||
expect(r.tight).toBe(false);
|
||||
});
|
||||
it('shrinks output as input grows', () => {
|
||||
const r = computeOutputBudget(30000, limits); // 32768 - 30000 - 2048 = 720
|
||||
expect(r.maxOutputTokens).toBe(720);
|
||||
expect(r.tight).toBe(false);
|
||||
});
|
||||
it('flags tight and floors at minOutputTokens when input nearly fills the window', () => {
|
||||
const r = computeOutputBudget(31000, limits); // available 32768-31000-2048 = -280 ≤ 512
|
||||
expect(r.maxOutputTokens).toBe(512);
|
||||
expect(r.tight).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('contextManager.trimHistoryToBudget', () => {
|
||||
const marker = (n: number): BudgetMessage => ({ role: 'system', content: `[dropped ${n}]`, internal: true });
|
||||
it('keeps everything when under budget', () => {
|
||||
const msgs: BudgetMessage[] = [{ role: 'user', content: 'hi' }, { role: 'assistant', content: 'hello' }];
|
||||
const r = trimHistoryToBudget(msgs, 10_000, marker);
|
||||
expect(r.droppedCount).toBe(0);
|
||||
expect(r.messages).toEqual(msgs);
|
||||
});
|
||||
it('drops oldest messages and prepends a marker when over budget', () => {
|
||||
const msgs: BudgetMessage[] = Array.from({ length: 10 }, (_, i) => ({ role: i % 2 ? 'assistant' : 'user', content: 'x'.repeat(400) }));
|
||||
const r = trimHistoryToBudget(msgs, 250, marker); // each msg ≈ 400*0.3+4 = 124 tokens
|
||||
expect(r.droppedCount).toBeGreaterThan(0);
|
||||
expect(r.messages[0].content).toMatch(/^\[dropped \d+\]$/);
|
||||
// most recent message survives
|
||||
expect(r.messages[r.messages.length - 1]).toEqual(msgs[msgs.length - 1]);
|
||||
expect(r.tokensAfter).toBeLessThanOrEqual(250 + estimateMessagesTokens([marker(1)]));
|
||||
});
|
||||
it('always keeps at least the last message even if it alone exceeds the budget', () => {
|
||||
const msgs: BudgetMessage[] = [{ role: 'user', content: 'short' }, { role: 'user', content: 'y'.repeat(5000) }];
|
||||
const r = trimHistoryToBudget(msgs, 10, marker);
|
||||
expect(r.messages.some(m => m.content === 'y'.repeat(5000))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('contextManager.truncateSystemPromptContext', () => {
|
||||
it('leaves a small prompt untouched', () => {
|
||||
const p = 'You are helpful.';
|
||||
expect(truncateSystemPromptContext(p, 1000)).toEqual({ prompt: p, truncated: false });
|
||||
});
|
||||
it('trims only the [CONTEXT]…[/CONTEXT] body, preserving head and tail', () => {
|
||||
const head = 'CORE INSTRUCTIONS that must never be dropped. ' + 'a'.repeat(200);
|
||||
const body = 'BIG BRAIN CONTEXT ' + 'b'.repeat(20_000);
|
||||
const tail = 'CRITICAL NEGATIVE CONSTRAINTS — also never dropped. ' + 'c'.repeat(200);
|
||||
const prompt = head + CONTEXT_OPEN_MARKER + body + CONTEXT_CLOSE_MARKER + tail;
|
||||
const out = truncateSystemPromptContext(prompt, 400);
|
||||
expect(out.truncated).toBe(true);
|
||||
expect(out.prompt).toContain('CORE INSTRUCTIONS');
|
||||
expect(out.prompt).toContain('CRITICAL NEGATIVE CONSTRAINTS');
|
||||
expect(out.prompt).toContain(CONTEXT_CLOSE_MARKER.trim());
|
||||
// The bulk of the body is gone
|
||||
expect(out.prompt.length).toBeLessThan(prompt.length / 2);
|
||||
expect(estimateTokens(out.prompt)).toBeLessThanOrEqual(400 + estimateTokens(tail) + 64);
|
||||
});
|
||||
it('falls back to a hard tail-cut when there is no [CONTEXT] marker', () => {
|
||||
const prompt = 'instructions ' + 'z'.repeat(50_000);
|
||||
const out = truncateSystemPromptContext(prompt, 200);
|
||||
expect(out.truncated).toBe(true);
|
||||
expect(out.prompt.length).toBeLessThan(prompt.length);
|
||||
expect(out.prompt.startsWith('instructions')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('contextManager.classifyStopReason', () => {
|
||||
it('maps engine-specific reasons to common kinds', () => {
|
||||
expect(classifyStopReason('eosFound')).toBe('complete');
|
||||
expect(classifyStopReason('stop')).toBe('complete');
|
||||
expect(classifyStopReason('length')).toBe('output-limit');
|
||||
expect(classifyStopReason('maxPredictedTokensReached')).toBe('output-limit');
|
||||
expect(classifyStopReason('contextLengthReached')).toBe('context-overflow');
|
||||
expect(classifyStopReason('userStopped')).toBe('user-stopped');
|
||||
expect(classifyStopReason('failed')).toBe('error');
|
||||
expect(classifyStopReason(undefined)).toBe('unknown');
|
||||
});
|
||||
});
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
import { LMStudioStreamer } from '../src/lmstudio/streamer';
|
||||
import type { ChatStreamEvent } from '../src/lmstudio/streamer';
|
||||
import type { ILMStudioClient } from '../src/lmstudio/client';
|
||||
|
||||
class FakeModel {
|
||||
@@ -15,14 +16,16 @@ class FakeModel {
|
||||
public failNext: Error | null = null;
|
||||
public chunks: string[] = [];
|
||||
|
||||
constructor(opts: { chunks?: string[]; failAfter?: number; throwOnRespond?: Error } = {}) {
|
||||
constructor(opts: { chunks?: string[]; failAfter?: number; throwOnRespond?: Error; stopReason?: string } = {}) {
|
||||
this.chunks = opts.chunks ?? ['Hel', 'lo, ', 'world'];
|
||||
this._failAfter = opts.failAfter;
|
||||
this._throwOnRespond = opts.throwOnRespond;
|
||||
this.stopReason = opts.stopReason;
|
||||
}
|
||||
|
||||
private _failAfter?: number;
|
||||
private _throwOnRespond?: Error;
|
||||
public stopReason: string | undefined;
|
||||
|
||||
respond(chat: any, opts: any) {
|
||||
if (this._throwOnRespond) {
|
||||
@@ -32,10 +35,15 @@ class FakeModel {
|
||||
this.lastOpts = opts;
|
||||
const chunks = this.chunks;
|
||||
const failAfter = this._failAfter;
|
||||
const stopReason = this.stopReason;
|
||||
let i = 0;
|
||||
const self = this;
|
||||
return {
|
||||
// Real OngoingPrediction is both async-iterable AND a thenable resolving to a
|
||||
// PredictionResult with `.stats.stopReason`. Mirror that shape so the streamer
|
||||
// can read the stop reason after the stream drains.
|
||||
const prediction: any = {
|
||||
cancel: async () => { self.cancelCount++; },
|
||||
then(resolve: (v: any) => void) { resolve({ stats: { stopReason } }); },
|
||||
[Symbol.asyncIterator]() {
|
||||
return {
|
||||
async next() {
|
||||
@@ -54,6 +62,7 @@ class FakeModel {
|
||||
};
|
||||
},
|
||||
};
|
||||
return prediction;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,9 +87,19 @@ class FakeClient implements ILMStudioClient {
|
||||
}
|
||||
}
|
||||
|
||||
async function collect(stream: AsyncIterable<{ token: string }>): Promise<string[]> {
|
||||
// The streamer emits a trailing { token: '', stopReason } event on normal completion;
|
||||
// `collect` returns just the non-empty content tokens (what every real consumer uses).
|
||||
async function collect(stream: AsyncIterable<ChatStreamEvent>): Promise<string[]> {
|
||||
const out: string[] = [];
|
||||
for await (const { token } of stream) out.push(token);
|
||||
for await (const { token } of stream) {
|
||||
if (token) out.push(token);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
async function collectEvents(stream: AsyncIterable<ChatStreamEvent>): Promise<ChatStreamEvent[]> {
|
||||
const out: ChatStreamEvent[] = [];
|
||||
for await (const ev of stream) out.push(ev);
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -98,6 +117,22 @@ describe('LMStudioStreamer', () => {
|
||||
expect(client.model.lastOpts.temperature).toBe(0.4);
|
||||
});
|
||||
|
||||
test('emits a trailing stopReason event from prediction stats', async () => {
|
||||
const client = new FakeClient(new FakeModel({ chunks: ['hi'], stopReason: 'maxPredictedTokensReached' }));
|
||||
const streamer = new LMStudioStreamer(client);
|
||||
const events = await collectEvents(streamer.stream({
|
||||
modelName: 'm1',
|
||||
messages: [{ role: 'user', content: 'hi' }],
|
||||
temperature: 0.1,
|
||||
maxTokens: 64,
|
||||
}));
|
||||
expect(events.map(e => e.token)).toEqual(['hi', '']);
|
||||
expect(events[events.length - 1].stopReason).toBe('maxPredictedTokensReached');
|
||||
// maxTokens / contextOverflowPolicy are forwarded to the SDK
|
||||
expect(client.model.lastOpts.maxTokens).toBe(64);
|
||||
expect(client.model.lastOpts.contextOverflowPolicy).toBe('stopAtLimit');
|
||||
});
|
||||
|
||||
test('passes signal through to the SDK', async () => {
|
||||
const client = new FakeClient();
|
||||
const streamer = new LMStudioStreamer(client);
|
||||
|
||||
Reference in New Issue
Block a user