feat: integrate unified RAG pipeline and bump version to 2.60.0

This commit is contained in:
g1nation
2026-05-04 11:00:01 +09:00
parent 0515dd625d
commit 445d530b63
16 changed files with 2178 additions and 112 deletions
+12 -36
View File
@@ -1,6 +1,7 @@
import * as fs from 'fs';
import * as path from 'path';
import { findBrainFiles, summarizeText } from '../utils';
import { expandQuery, scoreTfIdf, extractBestExcerpt, tokenize as scoringTokenize } from '../retrieval/scoring';
export type SecondBrainSourceType = 'Project Evidence' | 'User Decision' | 'General Knowledge' | 'Reference Only';
export type SecondBrainQueryIntent = 'technical' | 'ux-business' | 'governance' | 'general';
@@ -547,16 +548,7 @@ function isStructuredKnowledgeRequest(query: string): boolean {
}
function tokenize(value: string): string[] {
const stopWords = new Set([
'그리고', '그런데', '해서', '하는', '있어', '아래', '문제점들을', '해결하기', '위해서',
'어떻게', '대응해야할지', '가이드를', '작성해줘', '필요', '지점', '보완',
'what', 'how', 'the', 'and', 'for', 'with', 'please', 'write', 'guide', 'recommendations'
]);
return value
.toLowerCase()
.split(/[^a-z0-9가-힣_]+/g)
.map((term) => term.trim())
.filter((term) => term.length >= 2 && !stopWords.has(term));
return scoringTokenize(value);
}
function inferTargetProject(query: string): string | undefined {
@@ -588,21 +580,23 @@ function scoreFile(file: string, brainRoot: string, terms: string[], intent: Sec
if (targetProject) {
score += projectRelevanceScore(relative, lower, targetProject, documentProject);
}
for (const term of terms) {
if (basename.includes(term)) score += 4;
const matches = lower.split(term).length - 1;
if (matches > 0) score += knowledgeRole === 'routing-hint' ? Math.min(matches, 1) : Math.min(matches, 6);
}
const expandedTerms = expandQuery(terms);
const scoredTfIdf = scoreTfIdf(expandedTerms, [{ title, content, lastModified: Date.now() }])[0];
score += scoredTfIdf.score;
if (knowledgeRole === 'routing-hint') {
score -= 8;
}
const finalExcerpt = extractBestExcerpt(content, expandedTerms, 420);
return {
title,
path: relative,
absolutePath: file,
score: Number((Math.max(score, 0) / Math.max(terms.length, 1)).toFixed(2)),
excerpt: summarizeText(bestExcerpt(content, terms), 420),
score: Number((Math.max(score, 0) / Math.max(expandedTerms.length, 1)).toFixed(2)),
excerpt: summarizeText(finalExcerpt, 420),
sourceType,
knowledgeRole,
canSupportProjectClaim,
@@ -705,25 +699,7 @@ function pathPriority(relativePath: string, intent: SecondBrainQueryIntent): num
return score;
}
function bestExcerpt(content: string, terms: string[]): string {
const paragraphs = content
.split(/\n\s*\n/g)
.map((part) => part.replace(/\s+/g, ' ').trim())
.filter(Boolean);
if (paragraphs.length === 0) return '';
let best = paragraphs[0];
let bestScore = -1;
for (const paragraph of paragraphs) {
const lower = paragraph.toLowerCase();
const score = terms.reduce((sum, term) => sum + (lower.includes(term) ? 1 : 0), 0);
if (score > bestScore) {
best = paragraph;
bestScore = score;
}
}
return best;
}
// bestExcerpt is replaced by extractBestExcerpt from scoring.ts
function inferCollections(docs: SecondBrainTraceDocument[]): string[] {
const collections = new Set<string>();