feat(scoring): integrated conflict detection and info density metrics v2.71.0
This commit is contained in:
@@ -51,7 +51,11 @@ const SCORING_CONFIG = {
|
||||
] as [string, string[]][],
|
||||
DENSITY_THRESHOLD: 0.15, // 발췌문 추출 시 최소 키워드 밀도
|
||||
TITLE_MULTIPLIER: 3.0, // 제목 일치 가중치
|
||||
GLOBAL_CACHE_LIMIT: 2000
|
||||
GLOBAL_CACHE_LIMIT: 2000,
|
||||
CONFLICT_INDICATORS: new Set([
|
||||
'반대', '충돌', '오류', '논란', '반박', '차이', '대조',
|
||||
'conflict', 'contradict', 'dispute', 'controversy', 'error', 'mismatch', 'vs'
|
||||
])
|
||||
};
|
||||
|
||||
// ─── Global Search State & Cache ───
|
||||
@@ -168,6 +172,8 @@ export interface ScoredDocument {
|
||||
titleBoost: number;
|
||||
recencyBoost: number;
|
||||
matchedTerms: string[];
|
||||
conflictDetected: boolean;
|
||||
informationDensity: number;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -208,6 +214,9 @@ export function scoreTfIdf(
|
||||
let score = 0;
|
||||
const matchedTerms: string[] = [];
|
||||
|
||||
// Conflict Detection: 문서 내 상충 지표 확인
|
||||
const conflictDetected = docTokens.some(t => SCORING_CONFIG.CONFLICT_INDICATORS.has(t));
|
||||
|
||||
for (const term of expandedQuery) {
|
||||
const tf = termFrequency(term, docTokens);
|
||||
const idf = idfCache.get(term) || 1;
|
||||
@@ -217,12 +226,15 @@ export function scoreTfIdf(
|
||||
matchedTerms.push(term);
|
||||
}
|
||||
|
||||
// Title match bonus (3x)
|
||||
const titleMultiplier = titleTokens.has(term) ? 3.0 : 1.0;
|
||||
// Title match bonus
|
||||
const titleMultiplier = titleTokens.has(term) ? SCORING_CONFIG.TITLE_MULTIPLIER : 1.0;
|
||||
score += tfidf * titleMultiplier;
|
||||
}
|
||||
|
||||
// Recency boost: documents modified recently get a boost
|
||||
// Information Density: 쿼리 관련 토큰의 밀도 측정
|
||||
const informationDensity = docTokens.length > 0 ? matchedTerms.length / docTokens.length : 0;
|
||||
|
||||
// Recency boost
|
||||
let recencyBoost = 0;
|
||||
if (doc.lastModified) {
|
||||
const daysAgo = (now - doc.lastModified) / (1000 * 60 * 60 * 24);
|
||||
@@ -239,7 +251,9 @@ export function scoreTfIdf(
|
||||
score: score + recencyBoost + titleBoost,
|
||||
titleBoost,
|
||||
recencyBoost,
|
||||
matchedTerms: [...new Set(matchedTerms)]
|
||||
matchedTerms: [...new Set(matchedTerms)],
|
||||
conflictDetected,
|
||||
informationDensity
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user