feat(scoring): completed roadmap Phase 1 & 2 with edge case stability v2.74.0

This commit is contained in:
g1nation
2026-05-05 11:20:44 +09:00
parent e6bc263872
commit 518a5ed317
5 changed files with 86 additions and 10 deletions
+24 -1
View File
@@ -71,6 +71,29 @@ describe('Scoring Engine Unit Tests (v2.72.0)', () => {
expect(excerpt).not.toContain('첫 번째 문장');
});
test('Edge Case Tokenization: should handle extreme mixed strings and symbols', () => {
const text = 'A한B글C1!@#$ D.E.F_G 🚀Astra_v2.0';
const tokens = tokenize(text);
// Language boundary split should handle alternating chars
expect(tokens).toContain('astra');
expect(tokens).toContain('v2');
expect(tokens).toContain('한');
expect(tokens).toContain('글');
// Symbols should be filtered out
expect(tokens.some(t => /^[!@#$]+$/.test(t))).toBe(false);
});
test('Long String Performance: should handle 10k character content', () => {
const longContent = '성능 '.repeat(2000) + '최적화 '.repeat(2000);
const start = Date.now();
const tokens = tokenize(longContent);
const duration = Date.now() - start;
expect(tokens.length).toBeGreaterThan(0);
expect(duration).toBeLessThan(100); // Tokenizer should be efficient even for long text
});
test('Performance Benchmark: should process 100 documents within threshold', () => {
const query = tokenize('performance optimization');
const largeDocs = Array.from({ length: 100 }, (_, i) => ({
@@ -83,6 +106,6 @@ describe('Scoring Engine Unit Tests (v2.72.0)', () => {
const duration = Date.now() - start;
console.log(`[Benchmark] 100 docs processing time: ${duration}ms`);
expect(duration).toBeLessThan(200); // Should be very fast due to caching
expect(duration).toBeLessThan(200);
});
});