feat(scoring): completed roadmap Phase 1 & 2 with edge case stability v2.74.0

This commit is contained in:
g1nation
2026-05-05 11:20:44 +09:00
parent e6bc263872
commit 518a5ed317
5 changed files with 86 additions and 10 deletions
+16 -6
View File
@@ -55,7 +55,12 @@ const SCORING_CONFIG = {
CONFLICT_INDICATORS: new Set([
'반대', '충돌', '오류', '논란', '반박', '차이', '대조',
'conflict', 'contradict', 'dispute', 'controversy', 'error', 'mismatch', 'vs'
])
]),
CONFLICT_THRESHOLDS: {
HIGH: 4,
MEDIUM: 2,
LOW: 1
}
};
// ─── Global Search State & Cache ───
@@ -86,9 +91,14 @@ export function tokenize(text: string): string[] {
const splitText = normalized.replace(/([a-z0-9]+)([가-힣]+)/gi, '$1 $2').replace(/([가-힣]+)([a-z0-9]+)/gi, '$1 $2');
const tokens = splitText
.split(/[^a-z0-9가-힣_]+/g)
.split(/[^a-z0-9가-힣]+/g)
.map((t) => t.trim())
.filter((t) => t.length >= 2)
.filter((t) => {
if (!t) return false;
// 한글이 포함된 경우 한 글자라도 허용, 그 외(영문/숫자)는 2글자 이상
if (/[가-힣]/.test(t)) return t.length >= 1;
return t.length >= 2;
})
.filter((t) => !SCORING_CONFIG.STOP_WORDS_EN.has(t) && !SCORING_CONFIG.STOP_WORDS_KO.has(t));
if (TOKEN_CACHE.size >= SCORING_CONFIG.GLOBAL_CACHE_LIMIT) TOKEN_CACHE.clear();
@@ -228,9 +238,9 @@ export function scoreTfIdf(
const conflictDetected = conflictMatches.length > 0;
let conflictSeverity: ConflictSeverity = 'NONE';
if (conflictMatches.length >= 4) conflictSeverity = 'HIGH';
else if (conflictMatches.length >= 2) conflictSeverity = 'MEDIUM';
else if (conflictMatches.length === 1) conflictSeverity = 'LOW';
if (conflictMatches.length >= SCORING_CONFIG.CONFLICT_THRESHOLDS.HIGH) conflictSeverity = 'HIGH';
else if (conflictMatches.length >= SCORING_CONFIG.CONFLICT_THRESHOLDS.MEDIUM) conflictSeverity = 'MEDIUM';
else if (conflictMatches.length >= SCORING_CONFIG.CONFLICT_THRESHOLDS.LOW) conflictSeverity = 'LOW';
for (const term of expandedQuery) {
const tf = termFrequency(term, docTokens);