feat(scoring): implemented semantic context padding and optimized excerpting v2.76.0
This commit is contained in:
Generated
+2
-2
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "g1nation",
|
||||
"version": "2.75.0",
|
||||
"version": "2.76.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "g1nation",
|
||||
"version": "2.75.0",
|
||||
"version": "2.76.0",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"marked": "^18.0.2"
|
||||
|
||||
+1
-1
@@ -2,7 +2,7 @@
|
||||
"name": "astra",
|
||||
"displayName": "Astra",
|
||||
"description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.",
|
||||
"version": "2.75.0",
|
||||
"version": "2.76.0",
|
||||
"publisher": "g1nation",
|
||||
"license": "MIT",
|
||||
"icon": "assets/icon.png",
|
||||
|
||||
@@ -300,7 +300,7 @@ export function extractBestExcerpt(
|
||||
const sentences = content
|
||||
.split(/(?<=[.!?。!?\n])\s*/)
|
||||
.map((s) => s.trim())
|
||||
.filter((s) => s.length > 10);
|
||||
.filter((s) => s.length > 5);
|
||||
|
||||
if (sentences.length === 0) return content.slice(0, maxLength);
|
||||
|
||||
@@ -343,8 +343,16 @@ export function extractBestExcerpt(
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Result construction with semantic context padding
|
||||
let finalStart = bestStart;
|
||||
let finalEnd = bestStart + bestLen;
|
||||
|
||||
// 전후 문맥을 1문장씩 추가하여 의미적 완전성 확보 (예산 허용 시)
|
||||
if (finalStart > 0) finalStart--;
|
||||
if (finalEnd < scored.length) finalEnd++;
|
||||
|
||||
const excerptSentences = scored
|
||||
.slice(bestStart, bestStart + bestLen)
|
||||
.slice(finalStart, finalEnd)
|
||||
.map((s) => s.sentence);
|
||||
|
||||
const result = excerptSentences.join(' ');
|
||||
|
||||
@@ -94,6 +94,21 @@ describe('Scoring Engine Unit Tests (v2.72.0)', () => {
|
||||
expect(duration).toBeLessThan(100); // Tokenizer should be efficient even for long text
|
||||
});
|
||||
|
||||
test('Contextual Completeness: should include adjacent sentences for semantic padding', () => {
|
||||
const content = `
|
||||
도입부 문장입니다.
|
||||
핵심 키워드 성능 최적화가 포함된 문장입니다.
|
||||
마무리 문장입니다.
|
||||
`;
|
||||
const query = ['성능', '최적화'];
|
||||
const excerpt = extractBestExcerpt(content, query, 200);
|
||||
|
||||
// Should include introduction and conclusion due to padding
|
||||
expect(excerpt).toContain('도입부 문장');
|
||||
expect(excerpt).toContain('핵심 키워드');
|
||||
expect(excerpt).toContain('마무리 문장');
|
||||
});
|
||||
|
||||
test('Performance Benchmark: should process 100 documents within threshold', () => {
|
||||
const query = tokenize('performance optimization');
|
||||
const largeDocs = Array.from({ length: 100 }, (_, i) => ({
|
||||
|
||||
Reference in New Issue
Block a user