feat: integrate unified RAG pipeline and bump version to 2.60.0
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { findBrainFiles, summarizeText } from '../utils';
|
||||
import { expandQuery, scoreTfIdf, extractBestExcerpt, tokenize as scoringTokenize } from '../retrieval/scoring';
|
||||
|
||||
export type SecondBrainSourceType = 'Project Evidence' | 'User Decision' | 'General Knowledge' | 'Reference Only';
|
||||
export type SecondBrainQueryIntent = 'technical' | 'ux-business' | 'governance' | 'general';
|
||||
@@ -547,16 +548,7 @@ function isStructuredKnowledgeRequest(query: string): boolean {
|
||||
}
|
||||
|
||||
function tokenize(value: string): string[] {
|
||||
const stopWords = new Set([
|
||||
'그리고', '그런데', '해서', '하는', '있어', '아래', '문제점들을', '해결하기', '위해서',
|
||||
'어떻게', '대응해야할지', '가이드를', '작성해줘', '필요', '지점', '보완',
|
||||
'what', 'how', 'the', 'and', 'for', 'with', 'please', 'write', 'guide', 'recommendations'
|
||||
]);
|
||||
return value
|
||||
.toLowerCase()
|
||||
.split(/[^a-z0-9가-힣_]+/g)
|
||||
.map((term) => term.trim())
|
||||
.filter((term) => term.length >= 2 && !stopWords.has(term));
|
||||
return scoringTokenize(value);
|
||||
}
|
||||
|
||||
function inferTargetProject(query: string): string | undefined {
|
||||
@@ -588,21 +580,23 @@ function scoreFile(file: string, brainRoot: string, terms: string[], intent: Sec
|
||||
if (targetProject) {
|
||||
score += projectRelevanceScore(relative, lower, targetProject, documentProject);
|
||||
}
|
||||
for (const term of terms) {
|
||||
if (basename.includes(term)) score += 4;
|
||||
const matches = lower.split(term).length - 1;
|
||||
if (matches > 0) score += knowledgeRole === 'routing-hint' ? Math.min(matches, 1) : Math.min(matches, 6);
|
||||
}
|
||||
const expandedTerms = expandQuery(terms);
|
||||
const scoredTfIdf = scoreTfIdf(expandedTerms, [{ title, content, lastModified: Date.now() }])[0];
|
||||
|
||||
score += scoredTfIdf.score;
|
||||
|
||||
if (knowledgeRole === 'routing-hint') {
|
||||
score -= 8;
|
||||
}
|
||||
|
||||
const finalExcerpt = extractBestExcerpt(content, expandedTerms, 420);
|
||||
|
||||
return {
|
||||
title,
|
||||
path: relative,
|
||||
absolutePath: file,
|
||||
score: Number((Math.max(score, 0) / Math.max(terms.length, 1)).toFixed(2)),
|
||||
excerpt: summarizeText(bestExcerpt(content, terms), 420),
|
||||
score: Number((Math.max(score, 0) / Math.max(expandedTerms.length, 1)).toFixed(2)),
|
||||
excerpt: summarizeText(finalExcerpt, 420),
|
||||
sourceType,
|
||||
knowledgeRole,
|
||||
canSupportProjectClaim,
|
||||
@@ -705,25 +699,7 @@ function pathPriority(relativePath: string, intent: SecondBrainQueryIntent): num
|
||||
return score;
|
||||
}
|
||||
|
||||
function bestExcerpt(content: string, terms: string[]): string {
|
||||
const paragraphs = content
|
||||
.split(/\n\s*\n/g)
|
||||
.map((part) => part.replace(/\s+/g, ' ').trim())
|
||||
.filter(Boolean);
|
||||
if (paragraphs.length === 0) return '';
|
||||
|
||||
let best = paragraphs[0];
|
||||
let bestScore = -1;
|
||||
for (const paragraph of paragraphs) {
|
||||
const lower = paragraph.toLowerCase();
|
||||
const score = terms.reduce((sum, term) => sum + (lower.includes(term) ? 1 : 0), 0);
|
||||
if (score > bestScore) {
|
||||
best = paragraph;
|
||||
bestScore = score;
|
||||
}
|
||||
}
|
||||
return best;
|
||||
}
|
||||
// bestExcerpt is replaced by extractBestExcerpt from scoring.ts
|
||||
|
||||
function inferCollections(docs: SecondBrainTraceDocument[]): string[] {
|
||||
const collections = new Set<string>();
|
||||
|
||||
Reference in New Issue
Block a user