/** * 하이브리드(sparse+dense) 검색 측정 — 청크 TF-IDF vs 청크+임베딩 (alpha sweep). * * 평소 테스트 런에서는 skip (실제 두뇌 + 로컬 임베딩 서버 필요). 수동 실행: * * ASTRA_EVAL_BRAIN="E:/Wiki/2nd/10_Wiki/Topics" \ * ASTRA_EVAL_EMBED_MODEL="text-embedding-nomic-embed-text-v1.5" \ * npx jest tests/retrievalEvalEmbedding.test.ts --verbose * * (서버 URL 은 ASTRA_EVAL_EMBED_URL, 기본 http://127.0.0.1:1234 — LM Studio) * * 측정 전에 두뇌 전체 청크 임베딩을 백필한다 — 결과 벡터는 brain-index 캐시에 * 영속되므로 이 테스트 1회 실행이 곧 런타임 초기 색인을 겸한다. */ import * as fs from 'fs'; import { RetrievalOrchestrator } from '../src/retrieval'; import { loadGoldenSet, runRetrievalEval, type EvalReport } from '../src/retrieval/evalHarness'; import { findBrainFiles } from '../src/utils'; import { getBrainTokenIndex, backfillBrainChunkEmbeddings } from '../src/retrieval/brainIndex'; import { embedTexts, embedQuery } from '../src/retrieval/embeddings'; const BRAIN = (process.env.ASTRA_EVAL_BRAIN || '').trim(); const EMBED_MODEL = (process.env.ASTRA_EVAL_EMBED_MODEL || '').trim(); const EMBED_URL = (process.env.ASTRA_EVAL_EMBED_URL || 'http://127.0.0.1:1234').trim(); const KS = [1, 3, 5]; const ALPHAS = [0.3, 0.5, 0.7]; const CHUNK_TARGET = 1200; const maybe = BRAIN && EMBED_MODEL && fs.existsSync(BRAIN) ? describe : describe.skip; maybe('retrieval A/B — chunk TF-IDF vs chunk+embedding', () => { jest.setTimeout(40 * 60_000); test('golden set hybrid comparison', async () => { const { entries, parseErrors } = loadGoldenSet(BRAIN); expect(entries.length).toBeGreaterThan(0); const allFiles = findBrainFiles(BRAIN); getBrainTokenIndex(BRAIN, allFiles); // ── 전체 청크 임베딩 백필 (이미 벡터 있는 청크는 건너뜀 → 재실행 저렴) ── const embed = (texts: string[]) => embedTexts(texts, { baseUrl: EMBED_URL, model: EMBED_MODEL }); const SLICE = 300; let embedded = 0; for (let i = 0; i < allFiles.length; i += SLICE) { embedded += await backfillBrainChunkEmbeddings(BRAIN, allFiles.slice(i, i + SLICE), EMBED_MODEL, embed, CHUNK_TARGET); // eslint-disable-next-line no-console console.log(`백필 진행 ${Math.min(i + SLICE, allFiles.length)}/${allFiles.length} 파일 · 신규 벡터 ${embedded}`); } const brain = { id: 'eval', name: 'EvalBrain', localBrainPath: BRAIN } as any; const orchestrator = new RetrievalOrchestrator(); // 질의 임베딩은 alpha 무관하게 동일 — 1회만 계산해 재사용. const queryVecs = new Map(); for (const e of entries) { queryVecs.set(e.query, await embedQuery(e.query, { baseUrl: EMBED_URL, model: EMBED_MODEL })); } const run = (alpha: number): Promise => runRetrievalEval({ entries, ks: KS, ranker: async (query: string) => orchestrator .rankBrainForEval(query, brain, { limit: Math.max(...KS) + 5, chunkLevelRetrieval: true, chunkTargetChars: CHUNK_TARGET, queryEmbedding: alpha > 0 ? queryVecs.get(query) : undefined, embeddingModel: alpha > 0 ? EMBED_MODEL : undefined, embeddingBlendAlpha: alpha, }) .map(r => r.relativePath), }); const base = await run(0); const hybrids: Array<{ alpha: number; report: EvalReport }> = []; for (const a of ALPHAS) hybrids.push({ alpha: a, report: await run(a) }); const pct = (x: number) => (x * 100).toFixed(1) + '%'; const lines: string[] = []; lines.push(''); lines.push(`══ 하이브리드 검색 측정 (질의 ${entries.length}건, 파싱오류 ${parseErrors}, 신규 벡터 ${embedded}) ══`); lines.push(`지표 | TF-IDF만 | ${ALPHAS.map(a => `α=${a}`.padStart(7)).join(' | ')}`); for (const k of KS) { lines.push(`recall@${k} | ${pct(base.recallAtK[k]).padStart(7)} | ${hybrids.map(h => pct(h.report.recallAtK[k]).padStart(7)).join(' | ')}`); } lines.push(`MRR | ${base.mrr.toFixed(3).padStart(7)} | ${hybrids.map(h => h.report.mrr.toFixed(3).padStart(7)).join(' | ')}`); // 최고 alpha 기준 miss/flip 진단 const best = hybrids.reduce((p, c) => (c.report.mrr > p.report.mrr ? c : p), hybrids[0]); lines.push(`-- α=${best.alpha} 기준 순위 변동 --`); base.perQuery.forEach((bq, i) => { const hq = best.report.perQuery[i]; if ((bq.firstHitRank === null) !== (hq.firstHitRank === null) || bq.firstHitRank !== hq.firstHitRank) { lines.push(` · "${bq.query.slice(0, 38)}" sparse=#${bq.firstHitRank ?? 'miss'} → hybrid=#${hq.firstHitRank ?? 'miss'}`); } }); const misses = best.report.perQuery.filter(q => q.firstHitRank === null); for (const m of misses) lines.push(` ✗ miss "${m.query.slice(0, 38)}" → 상위: ${m.topPaths.slice(0, 3).join(' · ')}`); // eslint-disable-next-line no-console console.log(lines.join('\n')); expect(base.total).toBe(entries.length); }); });