feat(scoring): completed roadmap Phase 1 & 2 with edge case stability v2.74.0
This commit is contained in:
+24
-1
@@ -71,6 +71,29 @@ describe('Scoring Engine Unit Tests (v2.72.0)', () => {
|
||||
expect(excerpt).not.toContain('첫 번째 문장');
|
||||
});
|
||||
|
||||
test('Edge Case Tokenization: should handle extreme mixed strings and symbols', () => {
|
||||
const text = 'A한B글C1!@#$ D.E.F_G 🚀Astra_v2.0';
|
||||
const tokens = tokenize(text);
|
||||
|
||||
// Language boundary split should handle alternating chars
|
||||
expect(tokens).toContain('astra');
|
||||
expect(tokens).toContain('v2');
|
||||
expect(tokens).toContain('한');
|
||||
expect(tokens).toContain('글');
|
||||
// Symbols should be filtered out
|
||||
expect(tokens.some(t => /^[!@#$]+$/.test(t))).toBe(false);
|
||||
});
|
||||
|
||||
test('Long String Performance: should handle 10k character content', () => {
|
||||
const longContent = '성능 '.repeat(2000) + '최적화 '.repeat(2000);
|
||||
const start = Date.now();
|
||||
const tokens = tokenize(longContent);
|
||||
const duration = Date.now() - start;
|
||||
|
||||
expect(tokens.length).toBeGreaterThan(0);
|
||||
expect(duration).toBeLessThan(100); // Tokenizer should be efficient even for long text
|
||||
});
|
||||
|
||||
test('Performance Benchmark: should process 100 documents within threshold', () => {
|
||||
const query = tokenize('performance optimization');
|
||||
const largeDocs = Array.from({ length: 100 }, (_, i) => ({
|
||||
@@ -83,6 +106,6 @@ describe('Scoring Engine Unit Tests (v2.72.0)', () => {
|
||||
const duration = Date.now() - start;
|
||||
|
||||
console.log(`[Benchmark] 100 docs processing time: ${duration}ms`);
|
||||
expect(duration).toBeLessThan(200); // Should be very fast due to caching
|
||||
expect(duration).toBeLessThan(200);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user