/** * Confidence Engine + Escalation Engine (Self-Evolving OS Phase 2) 단위 테스트. * 순수 함수만 검증 — vscode 의존 없음. */ import { extractAnswerSignals, computeConfidence, formatConfidenceFooter, toBand, RetrievalConfidenceSignals, } from '../src/intelligence/confidenceEngine'; import { decideEscalation, formatEscalationFooter } from '../src/intelligence/escalationEngine'; import { buildEpistemicGuardBlock } from '../src/intelligence/epistemicGuardBlock'; import { buildCitationTraceBlock } from '../src/retrieval/citationTrace'; import type { RetrievalChunk } from '../src/retrieval/types'; const strongRetrieval: RetrievalConfidenceSignals = { chunkCount: 5, topScore: 0.82, conflictCount: 0, ambiguityDetected: false, }; const noRetrieval: RetrievalConfidenceSignals = { chunkCount: 0, topScore: 0, conflictCount: 0, ambiguityDetected: false, }; describe('extractAnswerSignals', () => { it('헤지 마커와 출처 인용을 추출한다', () => { const s = extractAnswerSignals('시장 규모는 5조원으로 추정됩니다. (확인 필요)\n\n*출처:* `시장조사.md`', 0); expect(s.hedgeCount).toBe(2); expect(s.hasCitation).toBe(true); expect(s.modelKnowledgeOnly).toBe(false); }); it('모델 지식만 사용 표기를 구분한다', () => { const s = extractAnswerSignals('일반적인 설명입니다.\n\n*출처: 모델 지식 (검색 출처 미사용)*', null); expect(s.hasCitation).toBe(false); expect(s.modelKnowledgeOnly).toBe(true); }); }); describe('computeConfidence', () => { it('강한 그라운딩 + 출처 인용 + 커버리지 충족 → 높음(90+)', () => { const r = computeConfidence(strongRetrieval, { hedgeCount: 0, hasCitation: true, modelKnowledgeOnly: false, coverageMissing: 0, }); expect(r.score).toBeGreaterThanOrEqual(90); expect(r.band).toBe('high'); }); it('근거 없음 + 모델 지식만 → 매우 낮음(<50)', () => { const r = computeConfidence(noRetrieval, { hedgeCount: 2, hasCitation: false, modelKnowledgeOnly: true, coverageMissing: null, }); expect(r.score).toBeLessThan(50); expect(r.band).toBe('very-low'); }); it('충돌·모호성·커버리지 누락이 점수를 깎는다', () => { const clean = computeConfidence(strongRetrieval, { hedgeCount: 0, hasCitation: true, modelKnowledgeOnly: false, coverageMissing: 0, }); const dirty = computeConfidence( { ...strongRetrieval, conflictCount: 2, ambiguityDetected: true }, { hedgeCount: 0, hasCitation: true, modelKnowledgeOnly: false, coverageMissing: 3 }, ); expect(dirty.score).toBeLessThan(clean.score); expect(dirty.factors.some((f) => f.label.includes('충돌'))).toBe(true); }); it('점수는 0~100 으로 clamp 된다', () => { const r = computeConfidence(noRetrieval, { hedgeCount: 99, hasCitation: false, modelKnowledgeOnly: true, coverageMissing: 99, }); expect(r.score).toBeGreaterThanOrEqual(0); expect(r.score).toBeLessThanOrEqual(100); }); it('구간 경계 — 90/70/50', () => { expect(toBand(90)).toBe('high'); expect(toBand(89)).toBe('medium'); expect(toBand(70)).toBe('medium'); expect(toBand(69)).toBe('low'); expect(toBand(50)).toBe('low'); expect(toBand(49)).toBe('very-low'); }); }); describe('decideEscalation', () => { const coverageOk = { ran: true, taskId: 'meeting-minutes', taskLabel: '회의록', covered: ['참석자'], missing: [] as string[] }; const noTask = { ran: false, covered: [] as string[], missing: [] as string[] }; function conf(score: number) { return { score, band: toBand(score), bandLabel: '', factors: [] }; } it('확신도 <50 이면 무조건 에스컬레이션', () => { const d = decideEscalation({ confidence: conf(40), coverage: noTask, conflictCount: 0 }); expect(d.escalate).toBe(true); expect(d.reasons[0]).toContain('매우 낮음'); }); it('고영향 업무(회의록) + 확신도 <70 → 검토 권장', () => { const d = decideEscalation({ confidence: conf(60), coverage: coverageOk, conflictCount: 0 }); expect(d.escalate).toBe(true); expect(d.reasons.some((r) => r.includes('회의록'))).toBe(true); }); it('시장조사에서 출처 누락 → 단독 에스컬레이션', () => { const d = decideEscalation({ confidence: conf(85), coverage: { ran: true, taskId: 'market-research', taskLabel: '시장조사', covered: [], missing: ['출처'] }, conflictCount: 0, }); expect(d.escalate).toBe(true); expect(d.reasons.some((r) => r.includes('출처'))).toBe(true); }); it('출처 충돌 + 확신도 <90 → 에스컬레이션', () => { const d = decideEscalation({ confidence: conf(80), coverage: noTask, conflictCount: 1 }); expect(d.escalate).toBe(true); }); it('확신도 높음 + 충돌 없음 + 커버리지 충족 → 에스컬레이션 없음', () => { const d = decideEscalation({ confidence: conf(95), coverage: coverageOk, conflictCount: 0 }); expect(d.escalate).toBe(false); expect(formatEscalationFooter(d)).toBe(''); }); }); describe('formatConfidenceFooter', () => { it('점수·구간·상위 요인을 표시한다', () => { const r = computeConfidence(strongRetrieval, { hedgeCount: 0, hasCitation: true, modelKnowledgeOnly: false, coverageMissing: 0, }); const f = formatConfidenceFooter(r); expect(f).toContain(`확신도 ${r.score}/100`); expect(f).toContain('높음'); }); }); describe('buildEpistemicGuardBlock', () => { it('근거 없는 업무 turn 에 역질문 우선 지시가 들어간다', () => { const b = buildEpistemicGuardBlock({ chunkCount: 0, taskDetected: true }); expect(b).toContain('검색 근거가 없음'); expect(b).toContain('질문'); }); it('근거 있는 turn 은 3분류 규칙만', () => { const b = buildEpistemicGuardBlock({ chunkCount: 4, taskDetected: false }); expect(b).toContain('확인 필요'); expect(b).not.toContain('검색 근거가 없음'); }); }); describe('citationTrace Provenance 확장', () => { const mkChunk = (title: string, lastUpdated?: number): RetrievalChunk => ({ id: title, source: 'brain-memory' as any, title, content: 'body', score: 0.8, tokenEstimate: 1, metadata: { lastUpdated }, }); const NOW = new Date('2026-06-11T00:00:00Z').getTime(); it('수정일 메타데이터가 있으면 Provenance 섹션 표시 + 오래된 출처 경고', () => { const fresh = mkChunk('최근문서', NOW - 10 * 24 * 3600 * 1000); const stale = mkChunk('옛문서', NOW - 400 * 24 * 3600 * 1000); const b = buildCitationTraceBlock([fresh, stale], { nowMs: NOW }); expect(b).toContain('Provenance'); expect(b).toContain('최근문서'); expect(b).toContain('⚠️오래됨'); expect(b).toContain('현재와 다를 수 있음'); }); it('메타데이터 없으면 기존 블록과 동일 (Provenance 섹션 없음)', () => { const b = buildCitationTraceBlock([mkChunk('문서')], { nowMs: NOW }); expect(b).toContain('[CITATION TRACE]'); expect(b).not.toContain('Provenance'); }); });