108 lines
5.3 KiB
TypeScript
108 lines
5.3 KiB
TypeScript
import * as fs from 'fs';
|
|
import * as os from 'os';
|
|
import * as path from 'path';
|
|
import { getBrainTokenIndex, clearBrainTokenIndex } from '../src/retrieval/brainIndex';
|
|
|
|
function mkTmpBrain(): string {
|
|
return fs.mkdtempSync(path.join(os.tmpdir(), 'astra-brain-'));
|
|
}
|
|
function writeMd(brain: string, rel: string, content: string): string {
|
|
const p = path.join(brain, rel);
|
|
fs.mkdirSync(path.dirname(p), { recursive: true });
|
|
fs.writeFileSync(p, content, 'utf8');
|
|
return p;
|
|
}
|
|
function bumpMtime(file: string): void {
|
|
// Some CI / fast machines write within the same ms — force a distinct mtime.
|
|
const t = new Date(Date.now() + 5000);
|
|
fs.utimesSync(file, t, t);
|
|
}
|
|
|
|
describe('brainIndex.getBrainTokenIndex', () => {
|
|
let brain: string;
|
|
beforeEach(() => { brain = mkTmpBrain(); });
|
|
afterEach(() => {
|
|
clearBrainTokenIndex(brain);
|
|
try { fs.rmSync(brain, { recursive: true, force: true }); } catch { /* ignore */ }
|
|
});
|
|
|
|
it('tokenizes files, returns one entry per file, and tags lesson cards', () => {
|
|
const a = writeMd(brain, 'architecture-overview.md', '# Architecture overview\nThis describes the system architecture and design.');
|
|
const b = writeMd(brain, 'records/bug-report.md', '# Bug report\n이 설계는 기존 구조와 충돌 위험이 있습니다.');
|
|
const c = writeMd(brain, 'lessons/allowlist.md', '# Lesson\n## Prevention Checklist\n- check the allowlist');
|
|
const out = getBrainTokenIndex(brain, [a, b, c]);
|
|
expect(out).toHaveLength(3);
|
|
const byPath = new Map(out.map(d => [d.filePath, d]));
|
|
expect(byPath.get(a)!.tokens).toContain('architecture');
|
|
expect(byPath.get(a)!.tokens).toContain('design');
|
|
expect(byPath.get(a)!.titleTokens.length).toBeGreaterThan(0);
|
|
expect(byPath.get(a)!.kind).toBe('');
|
|
expect(byPath.get(b)!.relativePath).toBe(path.join('records', 'bug-report.md'));
|
|
expect(byPath.get(b)!.conflictCount).toBeGreaterThan(0); // "충돌" is a conflict indicator
|
|
expect(byPath.get(a)!.conflictCount).toBe(0);
|
|
expect(byPath.get(c)!.kind).toBe('lesson'); // detected from the lessons/ path segment
|
|
});
|
|
|
|
it('reuses cached tokens for unchanged files and re-indexes only changed ones', () => {
|
|
const a = writeMd(brain, 'alpha.md', 'alpha keyword stays the same here');
|
|
const b = writeMd(brain, 'beta.md', 'beta original wording goes here');
|
|
const first = getBrainTokenIndex(brain, [a, b]);
|
|
const aTokensRef = first.find(d => d.filePath === a)!.tokens;
|
|
|
|
// Re-call without changes — `a` should hand back the *same array reference* (served from cache).
|
|
const second = getBrainTokenIndex(brain, [a, b]);
|
|
expect(second.find(d => d.filePath === a)!.tokens).toBe(aTokensRef);
|
|
|
|
// Change b.
|
|
fs.writeFileSync(b, 'gamma replaced everything delta', 'utf8');
|
|
bumpMtime(b);
|
|
const third = getBrainTokenIndex(brain, [a, b]);
|
|
const bTokens = third.find(d => d.filePath === b)!.tokens;
|
|
expect(bTokens).toEqual(expect.arrayContaining(['gamma', 'delta']));
|
|
expect(bTokens).not.toContain('original');
|
|
// a still cached & unchanged.
|
|
expect(third.find(d => d.filePath === a)!.tokens).toBe(aTokensRef);
|
|
});
|
|
|
|
it('skips files that vanished between listing and reading', () => {
|
|
const a = writeMd(brain, 'present.md', 'present content');
|
|
const ghost = path.join(brain, 'ghost.md');
|
|
const out = getBrainTokenIndex(brain, [a, ghost]);
|
|
expect(out.map(d => d.filePath)).toEqual([a]);
|
|
});
|
|
|
|
it('re-indexes a file that was deleted and recreated (mtime/size mismatch)', () => {
|
|
const a = writeMd(brain, 'one.md', 'one content');
|
|
const two = writeMd(brain, 'two.md', 'two original content');
|
|
getBrainTokenIndex(brain, [a, two]);
|
|
fs.rmSync(two);
|
|
getBrainTokenIndex(brain, [a]);
|
|
const recreated = writeMd(brain, 'two.md', 'completely different replacement content');
|
|
bumpMtime(recreated);
|
|
const out = getBrainTokenIndex(brain, [a, recreated]);
|
|
expect(out.find(d => d.filePath === recreated)!.tokens).toContain('completely');
|
|
expect(out.find(d => d.filePath === recreated)!.tokens).not.toContain('original');
|
|
});
|
|
|
|
it('handles empty/invalid input gracefully', () => {
|
|
expect(getBrainTokenIndex('', ['x'])).toEqual([]);
|
|
expect(getBrainTokenIndex(brain, [])).toEqual([]);
|
|
});
|
|
|
|
it('persists the index to <brain>/.astra/brain-index.json (debounced) and adds a .gitignore', (done) => {
|
|
const a = writeMd(brain, 'persisted.md', 'persist me to disk');
|
|
getBrainTokenIndex(brain, [a]);
|
|
setTimeout(() => {
|
|
try {
|
|
const astraDir = path.join(brain, '.astra');
|
|
expect(fs.existsSync(path.join(astraDir, 'brain-index.json'))).toBe(true);
|
|
expect(fs.readFileSync(path.join(astraDir, '.gitignore'), 'utf8')).toContain('*');
|
|
const persisted = JSON.parse(fs.readFileSync(path.join(astraDir, 'brain-index.json'), 'utf8'));
|
|
expect(persisted.version).toBeGreaterThanOrEqual(1);
|
|
expect(persisted.entries[a].tokens).toContain('persist');
|
|
done();
|
|
} catch (e) { done(e as Error); }
|
|
}, 2200);
|
|
}, 6000);
|
|
});
|