From 18021bd19d56c1e6fb58cf7247fce49e2e9f8d4f Mon Sep 17 00:00:00 2001 From: g1nation Date: Tue, 5 May 2026 11:30:29 +0900 Subject: [PATCH] feat(scoring): implemented semantic context padding and optimized excerpting v2.76.0 --- package-lock.json | 4 ++-- package.json | 2 +- src/retrieval/scoring.ts | 12 ++++++++++-- tests/scoring.test.ts | 15 +++++++++++++++ 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/package-lock.json b/package-lock.json index 704618a..8d2ac4f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "g1nation", - "version": "2.75.0", + "version": "2.76.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "g1nation", - "version": "2.75.0", + "version": "2.76.0", "license": "MIT", "dependencies": { "marked": "^18.0.2" diff --git a/package.json b/package.json index b2eab97..ed7f04f 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "astra", "displayName": "Astra", "description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.", - "version": "2.75.0", + "version": "2.76.0", "publisher": "g1nation", "license": "MIT", "icon": "assets/icon.png", diff --git a/src/retrieval/scoring.ts b/src/retrieval/scoring.ts index 64f10cd..786adb0 100644 --- a/src/retrieval/scoring.ts +++ b/src/retrieval/scoring.ts @@ -300,7 +300,7 @@ export function extractBestExcerpt( const sentences = content .split(/(?<=[.!?。!?\n])\s*/) .map((s) => s.trim()) - .filter((s) => s.length > 10); + .filter((s) => s.length > 5); if (sentences.length === 0) return content.slice(0, maxLength); @@ -343,8 +343,16 @@ export function extractBestExcerpt( } } + // 4. Result construction with semantic context padding + let finalStart = bestStart; + let finalEnd = bestStart + bestLen; + + // 전후 문맥을 1문장씩 추가하여 의미적 완전성 확보 (예산 허용 시) + if (finalStart > 0) finalStart--; + if (finalEnd < scored.length) finalEnd++; + const excerptSentences = scored - .slice(bestStart, bestStart + bestLen) + .slice(finalStart, finalEnd) .map((s) => s.sentence); const result = excerptSentences.join(' '); diff --git a/tests/scoring.test.ts b/tests/scoring.test.ts index 93792ee..54df796 100644 --- a/tests/scoring.test.ts +++ b/tests/scoring.test.ts @@ -94,6 +94,21 @@ describe('Scoring Engine Unit Tests (v2.72.0)', () => { expect(duration).toBeLessThan(100); // Tokenizer should be efficient even for long text }); + test('Contextual Completeness: should include adjacent sentences for semantic padding', () => { + const content = ` + 도입부 문장입니다. + 핵심 키워드 성능 최적화가 포함된 문장입니다. + 마무리 문장입니다. + `; + const query = ['성능', '최적화']; + const excerpt = extractBestExcerpt(content, query, 200); + + // Should include introduction and conclusion due to padding + expect(excerpt).toContain('도입부 문장'); + expect(excerpt).toContain('핵심 키워드'); + expect(excerpt).toContain('마무리 문장'); + }); + test('Performance Benchmark: should process 100 documents within threshold', () => { const query = tokenize('performance optimization'); const largeDocs = Array.from({ length: 100 }, (_, i) => ({