feat(scoring): completed roadmap Phase 1 & 2 with edge case stability v2.74.0

2026-05-05 11:20:44 +09:00
parent e6bc263872
commit 518a5ed317
5 changed files with 86 additions and 10 deletions
@@ -71,6 +71,29 @@ describe('Scoring Engine Unit Tests (v2.72.0)', () => {
        expect(excerpt).not.toContain('첫 번째 문장');
    });

+    test('Edge Case Tokenization: should handle extreme mixed strings and symbols', () => {
+        const text = 'A한B글C1!@#$ D.E.F_G 🚀Astra_v2.0';
+        const tokens = tokenize(text);
+        
+        // Language boundary split should handle alternating chars
+        expect(tokens).toContain('astra');
+        expect(tokens).toContain('v2');
+        expect(tokens).toContain('한');
+        expect(tokens).toContain('글');
+        // Symbols should be filtered out
+        expect(tokens.some(t => /^[!@#$]+$/.test(t))).toBe(false);
+    });
+
+    test('Long String Performance: should handle 10k character content', () => {
+        const longContent = '성능 '.repeat(2000) + '최적화 '.repeat(2000);
+        const start = Date.now();
+        const tokens = tokenize(longContent);
+        const duration = Date.now() - start;
+        
+        expect(tokens.length).toBeGreaterThan(0);
+        expect(duration).toBeLessThan(100); // Tokenizer should be efficient even for long text
+    });
+
    test('Performance Benchmark: should process 100 documents within threshold', () => {
        const query = tokenize('performance optimization');
        const largeDocs = Array.from({ length: 100 }, (_, i) => ({
@@ -83,6 +106,6 @@ describe('Scoring Engine Unit Tests (v2.72.0)', () => {
        const duration = Date.now() - start;
        
        console.log(`[Benchmark] 100 docs processing time: ${duration}ms`);
-        expect(duration).toBeLessThan(200); // Should be very fast due to caching
+        expect(duration).toBeLessThan(200); 
    });
 });