feat(scoring): completed roadmap Phase 1 & 2 with edge case stability v2.74.0
This commit is contained in:
@@ -55,7 +55,12 @@ const SCORING_CONFIG = {
|
||||
CONFLICT_INDICATORS: new Set([
|
||||
'반대', '충돌', '오류', '논란', '반박', '차이', '대조',
|
||||
'conflict', 'contradict', 'dispute', 'controversy', 'error', 'mismatch', 'vs'
|
||||
])
|
||||
]),
|
||||
CONFLICT_THRESHOLDS: {
|
||||
HIGH: 4,
|
||||
MEDIUM: 2,
|
||||
LOW: 1
|
||||
}
|
||||
};
|
||||
|
||||
// ─── Global Search State & Cache ───
|
||||
@@ -86,9 +91,14 @@ export function tokenize(text: string): string[] {
|
||||
const splitText = normalized.replace(/([a-z0-9]+)([가-힣]+)/gi, '$1 $2').replace(/([가-힣]+)([a-z0-9]+)/gi, '$1 $2');
|
||||
|
||||
const tokens = splitText
|
||||
.split(/[^a-z0-9가-힣_]+/g)
|
||||
.split(/[^a-z0-9가-힣]+/g)
|
||||
.map((t) => t.trim())
|
||||
.filter((t) => t.length >= 2)
|
||||
.filter((t) => {
|
||||
if (!t) return false;
|
||||
// 한글이 포함된 경우 한 글자라도 허용, 그 외(영문/숫자)는 2글자 이상
|
||||
if (/[가-힣]/.test(t)) return t.length >= 1;
|
||||
return t.length >= 2;
|
||||
})
|
||||
.filter((t) => !SCORING_CONFIG.STOP_WORDS_EN.has(t) && !SCORING_CONFIG.STOP_WORDS_KO.has(t));
|
||||
|
||||
if (TOKEN_CACHE.size >= SCORING_CONFIG.GLOBAL_CACHE_LIMIT) TOKEN_CACHE.clear();
|
||||
@@ -228,9 +238,9 @@ export function scoreTfIdf(
|
||||
const conflictDetected = conflictMatches.length > 0;
|
||||
let conflictSeverity: ConflictSeverity = 'NONE';
|
||||
|
||||
if (conflictMatches.length >= 4) conflictSeverity = 'HIGH';
|
||||
else if (conflictMatches.length >= 2) conflictSeverity = 'MEDIUM';
|
||||
else if (conflictMatches.length === 1) conflictSeverity = 'LOW';
|
||||
if (conflictMatches.length >= SCORING_CONFIG.CONFLICT_THRESHOLDS.HIGH) conflictSeverity = 'HIGH';
|
||||
else if (conflictMatches.length >= SCORING_CONFIG.CONFLICT_THRESHOLDS.MEDIUM) conflictSeverity = 'MEDIUM';
|
||||
else if (conflictMatches.length >= SCORING_CONFIG.CONFLICT_THRESHOLDS.LOW) conflictSeverity = 'LOW';
|
||||
|
||||
for (const term of expandedQuery) {
|
||||
const tf = termFrequency(term, docTokens);
|
||||
|
||||
Reference in New Issue
Block a user