feat: integrate unified RAG pipeline and bump version to 2.60.0

2026-05-04 11:00:01 +09:00
parent 0515dd625d
commit 445d530b63
16 changed files with 2178 additions and 112 deletions
@@ -1,6 +1,7 @@
 import * as fs from 'fs';
 import * as path from 'path';
 import { findBrainFiles, summarizeText } from '../utils';
+import { expandQuery, scoreTfIdf, extractBestExcerpt, tokenize as scoringTokenize } from '../retrieval/scoring';

 export type SecondBrainSourceType = 'Project Evidence' | 'User Decision' | 'General Knowledge' | 'Reference Only';
 export type SecondBrainQueryIntent = 'technical' | 'ux-business' | 'governance' | 'general';
@@ -547,16 +548,7 @@ function isStructuredKnowledgeRequest(query: string): boolean {
 }

 function tokenize(value: string): string[] {
-    const stopWords = new Set([
-        '그리고', '그런데', '해서', '하는', '있어', '아래', '문제점들을', '해결하기', '위해서',
-        '어떻게', '대응해야할지', '가이드를', '작성해줘', '필요', '지점', '보완',
-        'what', 'how', 'the', 'and', 'for', 'with', 'please', 'write', 'guide', 'recommendations'
-    ]);
-    return value
-        .toLowerCase()
-        .split(/[^a-z0-9가-힣_]+/g)
-        .map((term) => term.trim())
-        .filter((term) => term.length >= 2 && !stopWords.has(term));
+    return scoringTokenize(value);
 }

 function inferTargetProject(query: string): string | undefined {
@@ -588,21 +580,23 @@ function scoreFile(file: string, brainRoot: string, terms: string[], intent: Sec
    if (targetProject) {
        score += projectRelevanceScore(relative, lower, targetProject, documentProject);
    }
-    for (const term of terms) {
-        if (basename.includes(term)) score += 4;
-        const matches = lower.split(term).length - 1;
-        if (matches > 0) score += knowledgeRole === 'routing-hint' ? Math.min(matches, 1) : Math.min(matches, 6);
-    }
+    const expandedTerms = expandQuery(terms);
+    const scoredTfIdf = scoreTfIdf(expandedTerms, [{ title, content, lastModified: Date.now() }])[0];
+    
+    score += scoredTfIdf.score;
+
    if (knowledgeRole === 'routing-hint') {
        score -= 8;
    }

+    const finalExcerpt = extractBestExcerpt(content, expandedTerms, 420);
+
    return {
        title,
        path: relative,
        absolutePath: file,
-        score: Number((Math.max(score, 0) / Math.max(terms.length, 1)).toFixed(2)),
-        excerpt: summarizeText(bestExcerpt(content, terms), 420),
+        score: Number((Math.max(score, 0) / Math.max(expandedTerms.length, 1)).toFixed(2)),
+        excerpt: summarizeText(finalExcerpt, 420),
        sourceType,
        knowledgeRole,
        canSupportProjectClaim,
@@ -705,25 +699,7 @@ function pathPriority(relativePath: string, intent: SecondBrainQueryIntent): num
    return score;
 }

-function bestExcerpt(content: string, terms: string[]): string {
-    const paragraphs = content
-        .split(/\n\s*\n/g)
-        .map((part) => part.replace(/\s+/g, ' ').trim())
-        .filter(Boolean);
-    if (paragraphs.length === 0) return '';
-
-    let best = paragraphs[0];
-    let bestScore = -1;
-    for (const paragraph of paragraphs) {
-        const lower = paragraph.toLowerCase();
-        const score = terms.reduce((sum, term) => sum + (lower.includes(term) ? 1 : 0), 0);
-        if (score > bestScore) {
-            best = paragraph;
-            bestScore = score;
-        }
-    }
-    return best;
-}
+// bestExcerpt is replaced by extractBestExcerpt from scoring.ts

 function inferCollections(docs: SecondBrainTraceDocument[]): string[] {
    const collections = new Set<string>();