feat(retrieval): 청킹/평가 하니스 + 검색 인덱스 개선

- src/retrieval/chunker.ts: 문서 청킹 로직 추가 - src/retrieval/evalHarness.ts + src/extension/evalCommands.ts: 검색 품질 평가 하니스 - brainIndex.ts / retrieval/index.ts / memoryContext.ts: 인덱싱·컨텍스트 빌더 개선 - config.ts / extension.ts / sidebarProvider.ts / package.json 갱신 - ADR-0030~0032 및 개발 기록, .astra 런타임 상태 동기화 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-08 19:27:10 +09:00
parent b94e6ad1da
commit d39eb27c90
26 changed files with 1471 additions and 208 deletions
@@ -60,6 +60,14 @@ export interface IAgentConfig {
     * Default 0.5 = equal weight, a reasonable starting point.
     */
    embeddingBlendAlpha: number;
+    /**
+     * Section-level chunking (Phase 1-가). true 면 brain 검색이 파일 단위가 아니라
+     * `##` 헤딩 기준 *섹션 청크* 단위로 색인·스코어링한다. 긴 다주제 문서의 recall 을
+     * 올린다. 기본 false (= 기존 파일 단위) — 평가 하니스로 A/B 비교 후 켜기 위함.
+     */
+    chunkLevelRetrieval: boolean;
+    /** 섹션 청크 목표 길이(문자). 이보다 길면 문단 경계로 더 쪼갠다. */
+    chunkTargetChars: number;
    /**
     * Conflict Surface — 검색된 출처의 conflictSeverity 신호를 [CONFLICT WARNINGS] 블록
     * 으로 시스템 프롬프트에 노출. v4 정책 텍스트(buildAstraModeSystemPrompt) 가 이미
@@ -436,6 +444,8 @@ export function getConfig(): IAgentConfig {
        finalOnlyRetryOnThoughtLeak: cfg.get<boolean>('finalOnlyRetryOnThoughtLeak', true),
        embeddingModel: (cfg.get<string>('embeddingModel', '') || '').trim(),
        embeddingBlendAlpha: Math.max(0, Math.min(1, cfg.get<number>('embeddingBlendAlpha', 0.5))),
+        chunkLevelRetrieval: cfg.get<boolean>('chunkLevelRetrieval', false),
+        chunkTargetChars: Math.max(400, Math.min(4000, cfg.get<number>('chunkTargetChars', 1200))),
        conflictHighlightingEnabled: cfg.get<boolean>('conflictHighlightingEnabled', true),
        conflictSeverityThreshold: (cfg.get<string>('conflictSeverityThreshold', 'medium') as 'low' | 'medium' | 'high') || 'medium',
        conflictCrossDocEnabled: cfg.get<boolean>('conflictCrossDocEnabled', true),
@@ -42,6 +42,7 @@ import { startStocksWatcher } from './features/stocks';
 import { registerProviderCommands } from './extension/providerCommands';
 import { registerScaffoldCommand } from './extension/scaffoldCommand';
 import { registerLessonCommands } from './extension/lessonCommands';
+import { registerEvalCommands } from './extension/evalCommands';
 import { registerTelegramCommands, TELEGRAM_TOKEN_SECRET_KEY, type TelegramTokenStore } from './extension/telegramCommands';
 import { setupSettingsPanel } from './extension/settingsSetup';
 import { createTelegramBot } from './integrations/telegram/telegramSetup';
@@ -267,6 +268,8 @@ export async function activate(context: vscode.ExtensionContext) {
        ...registerTelegramCommands(context, { telegramBot, telegramClient, tokenStore }),
        // knowledge map + lesson cards → `src/extension/lessonCommands.ts`
        ...registerLessonCommands({ getAgent: () => agent }),
+        // 검색 평가 하니스 (recall@k / MRR) → `src/extension/evalCommands.ts`
+        ...registerEvalCommands(),
        // architecture / company / calendar / devil commands → `src/extension/providerCommands.ts`
        ...registerProviderCommands(context, { getProvider: () => provider }),
    );
@@ -0,0 +1,228 @@
+import * as fs from 'fs';
+import * as path from 'path';
+import * as vscode from 'vscode';
+import { getConfig } from '../config';
+import { getActiveBrainProfile, findBrainFiles, logInfo, logError } from '../utils';
+import { RetrievalOrchestrator } from '../retrieval';
+import { getBrainTokenIndex, backfillBrainEmbeddings, backfillBrainChunkEmbeddings } from '../retrieval/brainIndex';
+import { embedQuery, embedTexts } from '../retrieval/embeddings';
+import {
+    loadGoldenSet,
+    runRetrievalEval,
+    formatReportMarkdown,
+    GOLDEN_TEMPLATE,
+    GOLDEN_REL_JSONL,
+} from '../retrieval/evalHarness';
+
+/**
+ * 검색 평가 명령 묶음 (Phase 1-나).
+ *
+ * `g1nation.eval.retrieval` — 활성 두뇌의 골든셋(.astra/eval/golden.jsonl)으로 검색
+ * recall@k / MRR 를 측정해 마크다운 리포트를 남긴다. 골든셋이 없으면 템플릿을 만들어
+ * 열어준다. 청킹(Phase 1-가) 도입 전/후를 같은 골든셋으로 돌려 개선을 *숫자로* 증명하는 것이 목적.
+ */
+export function registerEvalCommands(): vscode.Disposable[] {
+    return [
+        vscode.commands.registerCommand('g1nation.eval.retrieval', runRetrievalEvalCommand),
+        vscode.commands.registerCommand('g1nation.embeddings.backfill', backfillEmbeddingsCommand),
+    ];
+}
+
+const EVAL_KS = [1, 3, 5, 10];
+
+async function runRetrievalEvalCommand(): Promise<void> {
+    try {
+        const brain = getActiveBrainProfile();
+        if (!brain?.localBrainPath || !fs.existsSync(brain.localBrainPath)) {
+            vscode.window.showErrorMessage('활성 두뇌 폴더를 찾을 수 없습니다. 먼저 두뇌를 추가/선택하세요.');
+            return;
+        }
+
+        // 1) 골든셋 로드 — 없으면 템플릿 스캐폴드 후 열어주고 종료.
+        const { entries, sourcePath, parseErrors } = loadGoldenSet(brain.localBrainPath);
+        if (entries.length === 0) {
+            const goldenPath = path.join(brain.localBrainPath, GOLDEN_REL_JSONL);
+            const created = await scaffoldGoldenSet(goldenPath, sourcePath, parseErrors);
+            if (created) {
+                const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(goldenPath));
+                await vscode.window.showTextDocument(doc);
+                vscode.window.showInformationMessage(
+                    '골든셋 템플릿을 만들었습니다. 질문→기대문서 쌍을 채운 뒤 다시 "Astra: 검색 평가 실행"을 실행하세요.',
+                );
+            }
+            return;
+        }
+
+        const config = getConfig();
+
+        await vscode.window.withProgress(
+            { location: vscode.ProgressLocation.Notification, title: 'Astra 검색 평가', cancellable: false },
+            async (progress) => {
+                // 2) 인덱스 워밍업 — 전체 brain 파일을 토크나이즈 인덱스에 로드 (backfill 의 전제).
+                progress.report({ message: '인덱스 로드 중…' });
+                const allFiles = findBrainFiles(brain.localBrainPath);
+                getBrainTokenIndex(brain.localBrainPath, allFiles);
+
+                // 3) 임베딩 backfill — 설정된 경우 dense 항이 공정하게 평가되도록 모든 파일 벡터를 채운다.
+                const useEmbeddings = !!config.embeddingModel && (config.embeddingBlendAlpha ?? 0) > 0;
+                if (useEmbeddings) {
+                    progress.report({ message: `임베딩 채우는 중 (${config.embeddingModel})…` });
+                    const embed = (texts: string[]) => embedTexts(texts, { baseUrl: config.ollamaUrl, model: config.embeddingModel });
+                    try {
+                        if (config.chunkLevelRetrieval === true) {
+                            await backfillBrainChunkEmbeddings(brain.localBrainPath, allFiles, config.embeddingModel, embed, config.chunkTargetChars);
+                        } else {
+                            await backfillBrainEmbeddings(brain.localBrainPath, allFiles, config.embeddingModel, embed);
+                        }
+                    } catch (e: any) {
+                        logInfo('Eval embedding backfill failed — continuing TF-IDF only.', { error: e?.message ?? String(e) });
+                    }
+                }
+
+                // 4) 평가 실행. ranker 는 프로덕션과 동일한 scoring 경로를 쓰되 budget 적용 전 랭킹을 본다.
+                const orchestrator = new RetrievalOrchestrator();
+                let done = 0;
+                const ranker = async (query: string): Promise<string[]> => {
+                    done++;
+                    progress.report({ message: `질의 ${done}/${entries.length} 평가 중…` });
+                    let queryEmbedding: number[] | undefined;
+                    if (useEmbeddings) {
+                        try {
+                            queryEmbedding = await Promise.race([
+                                embedQuery(query, { baseUrl: config.ollamaUrl, model: config.embeddingModel }),
+                                new Promise<undefined>((resolve) => setTimeout(() => resolve(undefined), 4000)),
+                            ]);
+                        } catch { queryEmbedding = undefined; }
+                    }
+                    return orchestrator
+                        .rankBrainForEval(query, brain, {
+                            limit: Math.max(...EVAL_KS) + 5,
+                            queryEmbedding,
+                            embeddingModel: config.embeddingModel || undefined,
+                            embeddingBlendAlpha: config.embeddingBlendAlpha,
+                            chunkLevelRetrieval: config.chunkLevelRetrieval === true,
+                            chunkTargetChars: config.chunkTargetChars,
+                        })
+                        .map((r) => r.relativePath);
+                };
+
+                const report = await runRetrievalEval({ entries, ks: EVAL_KS, ranker });
+
+                // 5) 리포트 저장 + 열기.
+                const now = new Date();
+                const stamp = now.toISOString().replace(/[:.]/g, '-').slice(0, 19);
+                const mode = config.chunkLevelRetrieval === true
+                    ? `섹션 청크 (target=${config.chunkTargetChars}자)`
+                    : '파일 단위 (baseline)';
+                const noteParts = [`검색 모드: ${mode}`];
+                if (parseErrors > 0) noteParts.push(`골든셋 파싱 실패 ${parseErrors}줄 (무시됨)`);
+                const md = formatReportMarkdown(report, {
+                    brainName: brain.name,
+                    dateStr: now.toLocaleString(),
+                    embeddingModel: useEmbeddings ? config.embeddingModel : '',
+                    alpha: config.embeddingBlendAlpha ?? 0,
+                    notes: noteParts.join(' · '),
+                });
+                const reportPath = path.join(brain.localBrainPath, '.astra', 'eval', `report-${stamp}.md`);
+                fs.mkdirSync(path.dirname(reportPath), { recursive: true });
+                fs.writeFileSync(reportPath, md, 'utf8');
+                logInfo('Retrieval eval complete.', {
+                    queries: report.total,
+                    recallAt5: report.recallAtK[5],
+                    mrr: report.mrr,
+                    reportPath,
+                });
+
+                const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(reportPath));
+                await vscode.window.showTextDocument(doc, { preview: false });
+                vscode.window.showInformationMessage(
+                    `검색 평가 완료 · recall@5 ${(report.recallAtK[5] * 100).toFixed(0)}% · MRR ${report.mrr.toFixed(2)} (질의 ${report.total}개)`,
+                );
+            },
+        );
+    } catch (err: any) {
+        logError('Retrieval eval command failed.', { error: err?.message || String(err) });
+        vscode.window.showErrorMessage(`검색 평가 실패: ${err?.message ?? err}`);
+    }
+}
+
+/**
+ * 두뇌 전체 임베딩 색인 채우기. 평소엔 턴마다 *검색된 파일* 만 lazy backfill 되므로 dense
+ * 검색이 충분히 효과를 내려면 오래 걸린다 — 이 명령으로 한 번에 채운다. 청크 모드면 청크
+ * 단위 벡터를, 아니면 파일 단위 벡터를 채운다. 엔진 호출 크기를 제한하려 파일 배치로 처리.
+ */
+async function backfillEmbeddingsCommand(): Promise<void> {
+    try {
+        const brain = getActiveBrainProfile();
+        if (!brain?.localBrainPath || !fs.existsSync(brain.localBrainPath)) {
+            vscode.window.showErrorMessage('활성 두뇌 폴더를 찾을 수 없습니다.');
+            return;
+        }
+        const config = getConfig();
+        if (!config.embeddingModel) {
+            vscode.window.showWarningMessage(
+                '임베딩 모델이 설정되지 않았습니다. 엔진(Ollama/LM Studio)에 임베딩 모델을 로드한 뒤 ' +
+                'g1nation.embeddingModel 에 그 모델명을 입력하세요. (없어도 TF-IDF 검색은 동작합니다.)',
+            );
+            return;
+        }
+
+        const chunkMode = config.chunkLevelRetrieval === true;
+        await vscode.window.withProgress(
+            { location: vscode.ProgressLocation.Notification, title: `Astra 임베딩 색인 (${config.embeddingModel})`, cancellable: true },
+            async (progress, token) => {
+                const allFiles = findBrainFiles(brain.localBrainPath);
+                getBrainTokenIndex(brain.localBrainPath, allFiles);
+                const embed = (texts: string[]) => embedTexts(texts, { baseUrl: config.ollamaUrl, model: config.embeddingModel });
+
+                const BATCH = 40;
+                let embedded = 0;
+                for (let i = 0; i < allFiles.length; i += BATCH) {
+                    if (token.isCancellationRequested) break;
+                    const slice = allFiles.slice(i, i + BATCH);
+                    progress.report({
+                        message: `${Math.min(i + BATCH, allFiles.length)}/${allFiles.length} 파일 · 임베딩 ${embedded}개`,
+                        increment: (BATCH / allFiles.length) * 100,
+                    });
+                    try {
+                        embedded += chunkMode
+                            ? await backfillBrainChunkEmbeddings(brain.localBrainPath, slice, config.embeddingModel, embed, config.chunkTargetChars)
+                            : await backfillBrainEmbeddings(brain.localBrainPath, slice, config.embeddingModel, embed);
+                    } catch (e: any) {
+                        logInfo('Embedding batch failed — continuing.', { batchStart: i, error: e?.message ?? String(e) });
+                    }
+                }
+                logInfo('Full-brain embedding backfill done.', { mode: chunkMode ? 'chunk' : 'file', files: allFiles.length, embedded });
+                vscode.window.showInformationMessage(
+                    `임베딩 색인 완료 · ${chunkMode ? '청크' : '파일'} 단위 · 신규 ${embedded}개 (${allFiles.length} 파일 스캔). ` +
+                    `이제 '검색 평가 실행'으로 dense 효과를 측정해 보세요.`,
+                );
+            },
+        );
+    } catch (err: any) {
+        logError('Embedding backfill command failed.', { error: err?.message || String(err) });
+        vscode.window.showErrorMessage(`임베딩 색인 실패: ${err?.message ?? err}`);
+    }
+}
+
+/** 골든셋 파일이 없을 때 템플릿을 만든다. 이미 (깨진/빈) 파일이 있으면 덮어쓰지 않는다. */
+async function scaffoldGoldenSet(goldenPath: string, existingSource: string | null, parseErrors: number): Promise<boolean> {
+    if (existingSource && fs.existsSync(existingSource)) {
+        // 파일은 있는데 유효 항목이 0개 — 사용자가 작성 중이거나 오타. 덮어쓰지 않고 안내만.
+        vscode.window.showWarningMessage(
+            `골든셋(${path.basename(existingSource)})에 유효한 항목이 없습니다${parseErrors ? ` (파싱 실패 ${parseErrors}줄)` : ''}. ` +
+            '각 줄을 {"query": "...", "expected": ["파일명.md"]} 형식으로 작성하세요.',
+        );
+        const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(existingSource));
+        await vscode.window.showTextDocument(doc);
+        return false;
+    }
+    try {
+        fs.mkdirSync(path.dirname(goldenPath), { recursive: true });
+        fs.writeFileSync(goldenPath, GOLDEN_TEMPLATE, 'utf8');
+        return true;
+    } catch (e: any) {
+        vscode.window.showErrorMessage(`골든셋 템플릿 생성 실패: ${e?.message ?? e}`);
+        return false;
+    }
+}
@@ -7,7 +7,7 @@ import type { MemoryManager } from '../../memory';
 import type { RetrievalOrchestrator } from '../../retrieval';
 import { buildLessonChecklistBlock } from '../../retrieval/lessonHelpers';
 import { embedQuery, embedTexts } from '../../retrieval/embeddings';
-import { backfillBrainEmbeddings } from '../../retrieval/brainIndex';
+import { backfillBrainEmbeddings, backfillBrainChunkEmbeddings } from '../../retrieval/brainIndex';
 import { resolveScopeForAgent } from '../../skills/agentKnowledgeMap';
 import {
    resolveKnowledgeMix,
@@ -207,6 +207,8 @@ export async function buildMemoryContext(deps: MemoryContextDeps): Promise<strin
        embeddingBlendAlpha: config.embeddingBlendAlpha,
        workStateSignals,
        hierarchicalReweightEnabled: config.hierarchicalReweightEnabled !== false,
+        chunkLevelRetrieval: config.chunkLevelRetrieval === true,
+        chunkTargetChars: config.chunkTargetChars,
    });

    // Semantic Re-rank (LLM, async) — selectedChunks 의 *순서* 만 재배치. 토큰 예산을
@@ -236,12 +238,13 @@ export async function buildMemoryContext(deps: MemoryContextDeps): Promise<strin
            .map((c) => c.metadata.filePath!)
            .filter((p, i, arr) => arr.indexOf(p) === i);
        if (scoredFilePaths.length > 0) {
-            void backfillBrainEmbeddings(
-                deps.activeBrain.localBrainPath,
-                scoredFilePaths,
-                config.embeddingModel,
-                (texts) => embedTexts(texts, { baseUrl: config.ollamaUrl, model: config.embeddingModel }),
-            );
+            const embed = (texts: string[]) => embedTexts(texts, { baseUrl: config.ollamaUrl, model: config.embeddingModel });
+            // 청크 모드면 청크 단위 벡터를, 아니면 파일 단위 벡터를 채운다 (불필요한 작업 회피).
+            if (config.chunkLevelRetrieval === true) {
+                void backfillBrainChunkEmbeddings(deps.activeBrain.localBrainPath, scoredFilePaths, config.embeddingModel, embed, config.chunkTargetChars);
+            } else {
+                void backfillBrainEmbeddings(deps.activeBrain.localBrainPath, scoredFilePaths, config.embeddingModel, embed);
+            }
        }
    }

@@ -15,12 +15,13 @@ import * as fs from 'fs';
 import * as path from 'path';
 import { tokenize, countConflictIndicators } from './scoring';
 import { detectLessonKind } from './lessonHelpers';
+import { splitIntoSections } from './chunker';
 import { logInfo } from '../utils';

-// v4 adds optional per-file `embedding` for hybrid (sparse+dense) retrieval.
-// Older v3 indexes are auto-rebuilt on first load — no migration needed because
-// the cache is derivable from the brain itself.
-const INDEX_VERSION = 4;
+// v5 adds optional per-file `chunks` (section-level index, Phase 1-가) alongside the
+// v4 per-file `embedding`. Older indexes are auto-rebuilt on first load — no migration
+// needed because the cache is fully derivable from the brain itself.
+const INDEX_VERSION = 5;
 const INDEX_DIR = '.astra';
 const INDEX_FILE = 'brain-index.json';
 /** 인덱스가 이 개수를 넘으면 이번 스캔에서 못 본 항목을 정리합니다 (삭제된 파일 누적 방지). */
@@ -45,11 +46,32 @@ interface IndexEntry {
    embedding?: number[];
    /** Embedding model the vector was produced with — invalidates the vector when the user switches models. */
    embeddingModel?: string;
+    /**
+     * Section-level chunks (Phase 1-가). 지연 계산 — chunk 모드 검색이 처음 요청할 때
+     * `getBrainChunkIndex` 가 채운다. 파일이 바뀌면 (재색인 시 entry 가 새로 만들어져)
+     * 자동으로 사라지므로 stale chunk 가 남지 않는다.
+     */
+    chunks?: ChunkEntry[];
+}
+
+interface ChunkEntry {
+    heading: string;
+    headingPath: string[];
+    tokens: string[];        // tokenize(`${title} ${headingPath} ${sectionText}`) — 문서 제목이 모든 청크에 기여
+    headingTokens: string[]; // tokenize(`${title} ${headingPath}`)
+    charStart: number;
+    charEnd: number;
+    /** 청크 단위 dense 벡터 (Phase 1-가 후속). 파일 단위보다 정밀. 지연 backfill. */
+    embedding?: number[];
+    /** 이 벡터를 만든 임베딩 모델 — 모델 변경 시 무효화. */
+    embeddingModel?: string;
 }

 interface PersistedIndex {
    version: number;
    entries: Record<string, IndexEntry>; // keyed by absolute file path
+    /** chunks 를 어떤 targetChars 로 만들었는지 — 설정이 바뀌면 chunk 층을 재생성. */
+    chunkTargetChars?: number;
 }

 export interface IndexedBrainDoc {
@@ -64,6 +86,23 @@ export interface IndexedBrainDoc {
    kind: string;
 }

+/** Flat chunk view returned by `getBrainChunkIndex` — 한 파일이 여러 청크로 펼쳐진다. */
+export interface IndexedChunk {
+    filePath: string;
+    relativePath: string;
+    title: string;
+    /** 파일 내 청크 순번 (0-based). */
+    chunkIndex: number;
+    heading: string;
+    headingPath: string[];
+    tokens: string[];
+    headingTokens: string[];
+    charStart: number;
+    charEnd: number;
+    mtimeMs: number;
+    kind: string;
+}
+
 interface BrainState {
    index: PersistedIndex;
    dirty: boolean;
@@ -223,6 +262,99 @@ export function getBrainTokenIndex(brainPath: string, files: string[]): IndexedB
    return out;
 }

+/**
+ * lesson/playbook/qa-finding 카드는 통째로 한 청크 — 섹션 분할이 essence 추출을
+ * 깨뜨리지 않도록. 일반 노트는 `splitIntoSections` 로 섹션 청크화. 문서 제목을 모든
+ * 청크 토큰에 prepend 해 제목 매치 강도(파일 모드의 titleTokens)를 보존한다.
+ */
+function buildChunkEntries(entry: IndexEntry, content: string, targetChars: number): ChunkEntry[] {
+    const whole = (): ChunkEntry[] => [{
+        heading: entry.title,
+        headingPath: [entry.title],
+        tokens: entry.tokens,
+        headingTokens: entry.titleTokens,
+        charStart: 0,
+        charEnd: content.length,
+    }];
+    if (entry.kind && entry.kind !== '') return whole();
+
+    const sections = splitIntoSections(content, {
+        targetChars,
+        minChars: Math.min(200, Math.floor(targetChars / 4)),
+        maxChars: targetChars * 2,
+    });
+    if (sections.length === 0) return whole();
+
+    return sections.map((s) => {
+        const headingText = [entry.title, ...s.headingPath].join(' ');
+        return {
+            heading: s.heading || entry.title,
+            headingPath: s.headingPath.length ? s.headingPath : [entry.title],
+            tokens: tokenize(`${headingText} ${s.text}`),
+            headingTokens: tokenize(headingText),
+            charStart: s.charStart,
+            charEnd: s.charEnd,
+        };
+    });
+}
+
+/**
+ * Section-level chunk view (Phase 1-가). 먼저 `getBrainTokenIndex` 로 파일 entry 를
+ * 최신화한 뒤, 각 파일의 chunk 층을 (없으면) 계산·캐시해서 flat 하게 펼쳐 반환한다.
+ * `targetChars` 가 직전 빌드값과 다르면 전체 chunk 캐시를 버리고 재생성한다.
+ * Steady-state(변경 없음 + 같은 target)에서는 디스크/CPU 작업 0.
+ */
+export function getBrainChunkIndex(brainPath: string, files: string[], targetChars: number): IndexedChunk[] {
+    if (!brainPath || !Array.isArray(files) || files.length === 0) return [];
+    // 1) 파일 entry 최신화 (토큰/메타/prune 까지 여기서 처리).
+    getBrainTokenIndex(brainPath, files);
+    const st = loadState(brainPath);
+
+    // 2) targetChars 변경 시 chunk 층 전체 무효화.
+    if (st.index.chunkTargetChars !== targetChars) {
+        for (const key of Object.keys(st.index.entries)) {
+            const e = st.index.entries[key];
+            if (e) e.chunks = undefined;
+        }
+        st.index.chunkTargetChars = targetChars;
+        st.dirty = true;
+    }
+
+    const out: IndexedChunk[] = [];
+    let built = 0;
+    for (const file of files) {
+        const entry = st.index.entries[file];
+        if (!entry) continue;
+        if (!entry.chunks) {
+            let content = '';
+            try { content = fs.readFileSync(file, 'utf8'); } catch { continue; }
+            entry.chunks = buildChunkEntries(entry, content, targetChars);
+            st.dirty = true;
+            built++;
+        }
+        for (let ci = 0; ci < entry.chunks.length; ci++) {
+            const ch = entry.chunks[ci];
+            out.push({
+                filePath: file,
+                relativePath: entry.relativePath,
+                title: entry.title,
+                chunkIndex: ci,
+                heading: ch.heading,
+                headingPath: ch.headingPath,
+                tokens: ch.tokens,
+                headingTokens: ch.headingTokens,
+                charStart: ch.charStart,
+                charEnd: ch.charEnd,
+                mtimeMs: entry.mtimeMs,
+                kind: entry.kind || '',
+            });
+        }
+    }
+    if (built > 0) logInfo('Brain chunk index built.', { brainPath, files: files.length, filesChunked: built, totalChunks: out.length, targetChars });
+    if (st.dirty) scheduleWrite(st, brainPath);
+    return out;
+}
+
 /**
 * Pull (filePath, embedding) for every file in `filePaths` that has a current
 * cached vector under `model`. Caller uses this to rank top TF-IDF candidates
@@ -310,6 +442,85 @@ export async function backfillBrainEmbeddings(
    }
 }

+/**
+ * 청크 단위 임베딩 조회. `${filePath}#${chunkIndex}` → vector. 모델 불일치/미존재 청크는 생략.
+ * searchBrainChunks 가 dense blend 에 사용 (파일 단위 공유보다 정밀).
+ */
+export function getBrainChunkEmbeddings(brainPath: string, model: string): Map<string, number[]> {
+    const out = new Map<string, number[]>();
+    if (!brainPath || !model.trim()) return out;
+    const st = _states.get(brainPath);
+    if (!st) return out;
+    for (const [fp, entry] of Object.entries(st.index.entries)) {
+        if (!entry?.chunks) continue;
+        for (let ci = 0; ci < entry.chunks.length; ci++) {
+            const ch = entry.chunks[ci];
+            if (!ch.embedding || ch.embeddingModel !== model) continue;
+            if (!Array.isArray(ch.embedding) || ch.embedding.length === 0) continue;
+            out.set(`${fp}#${ci}`, ch.embedding);
+        }
+    }
+    return out;
+}
+
+/**
+ * Background fill — 주어진 `files` 의 청크 중 현재 모델 벡터가 없는 것만 임베딩한다.
+ * 청크 텍스트는 캐시된 토큰에서 재구성(파일 단위 backfill 과 동일 전략 — 파일 재read 회피).
+ * Fire-and-forget 용. 새로 임베딩한 청크 수를 반환.
+ */
+export async function backfillBrainChunkEmbeddings(
+    brainPath: string,
+    files: string[],
+    model: string,
+    embedFn: (texts: string[]) => Promise<number[][]>,
+    targetChars: number,
+): Promise<number> {
+    if (!brainPath || !model.trim() || !Array.isArray(files) || files.length === 0) return 0;
+    // 청크 층 보장 (없으면 생성).
+    getBrainChunkIndex(brainPath, files, targetChars);
+    const st = _states.get(brainPath);
+    if (!st) return 0;
+
+    const texts: string[] = [];
+    const refs: Array<{ fp: string; ci: number }> = [];
+    for (const fp of files) {
+        const entry = st.index.entries[fp];
+        if (!entry?.chunks) continue;
+        for (let ci = 0; ci < entry.chunks.length; ci++) {
+            const ch = entry.chunks[ci];
+            if (ch.embedding && ch.embeddingModel === model) continue;
+            const text = Array.isArray(ch.tokens) && ch.tokens.length > 0 ? ch.tokens.join(' ') : '';
+            if (!text.trim()) continue;
+            texts.push(text);
+            refs.push({ fp, ci });
+        }
+    }
+    if (texts.length === 0) return 0;
+    try {
+        const vectors = await embedFn(texts);
+        let n = 0;
+        for (let i = 0; i < vectors.length && i < refs.length; i++) {
+            const v = vectors[i];
+            if (!Array.isArray(v) || v.length === 0) continue;
+            const entry = st.index.entries[refs[i].fp];
+            const ch = entry?.chunks?.[refs[i].ci];
+            if (!ch) continue;
+            ch.embedding = v;
+            ch.embeddingModel = model;
+            st.dirty = true;
+            n++;
+        }
+        if (n > 0) {
+            logInfo('Brain chunk embeddings backfilled.', { brainPath, model, embedded: n });
+            scheduleWrite(st, brainPath);
+        }
+        return n;
+    } catch (e: any) {
+        logInfo('Brain chunk embedding backfill failed (TF-IDF still works).', { brainPath, model, error: e?.message ?? String(e) });
+        return 0;
+    }
+}
+
 /** Drop the in-memory index (and pending write) for one brain, or all brains. The disk file is left as-is. */
 export function clearBrainTokenIndex(brainPath?: string): void {
    if (brainPath === undefined) {
@@ -0,0 +1,174 @@
+/**
+ * ============================================================
+ * Markdown Section Chunker (Phase 1-가)
+ *
+ * 긴 노트를 `#`~`######` 헤딩 경계로 *섹션 청크* 로 나눈다. 파일 단위 색인은 5000자
+ * 다주제 문서를 하나의 흐릿한 단위로 만들어 검색 정밀도를 떨어뜨린다 — 섹션 단위로
+ * 쪼개면 질의가 정확히 해당 섹션에 매치된다 (제2뇌의 "문서 청킹 전략" 지식 그대로).
+ *
+ * 규칙:
+ *   - 각 헤딩 ~ 다음 헤딩 직전까지가 raw 섹션. 첫 헤딩 이전 본문(preamble)도 한 섹션.
+ *   - 헤딩 breadcrumb(상위 헤딩 경로)을 함께 보존 → 청크가 문맥을 잃지 않음.
+ *   - minChars 미만의 짧은 섹션은 다음 섹션과 병합(헤딩만 있고 본문 적은 경우 흔함).
+ *   - targetChars 초과 누적 시 청크 확정. maxChars 초과 단일 섹션은 문단 경계로 재분할.
+ *
+ * 순수 함수 (fs/네트워크 의존 없음) — 단위 테스트·재현 용이.
+ * ============================================================
+ */
+
+export interface Section {
+    /** 이 섹션의 헤딩 텍스트 ('' = preamble). */
+    heading: string;
+    /** 루트→자기까지 헤딩 경로 (문맥용 breadcrumb). preamble 이면 []. */
+    headingPath: string[];
+    /** 섹션 본문(헤딩 라인 포함, 원문 그대로). */
+    text: string;
+    /** 원문 내 시작/끝 문자 오프셋 (디버그/추적용). */
+    charStart: number;
+    charEnd: number;
+}
+
+export interface ChunkOptions {
+    /** 청크 목표 길이. 누적이 이 값을 넘으면 확정. 기본 1200. */
+    targetChars: number;
+    /** 이보다 짧은 섹션은 다음과 병합. 기본 200. */
+    minChars: number;
+    /** 단일 청크가 이보다 길면 문단 경계로 재분할. 기본 = targetChars * 2. */
+    maxChars: number;
+}
+
+const DEFAULTS: ChunkOptions = { targetChars: 1200, minChars: 200, maxChars: 2400 };
+
+interface RawSection {
+    heading: string;
+    headingPath: string[];
+    start: number;
+    end: number;
+}
+
+const HEADING_RE = /^(#{1,6})[ \t]+(.+?)[ \t]*#*$/;
+
+/**
+ * 원문을 헤딩 경계 raw 섹션으로 분해. fenced code block(```) 안의 `#` 라인은
+ * 헤딩으로 보지 않는다 (코드 주석이 섹션을 깨는 것 방지).
+ */
+function parseRawSections(content: string): RawSection[] {
+    const lines = content.split('\n');
+    const sections: RawSection[] = [];
+    const stack: Array<{ level: number; title: string }> = [];
+
+    let offset = 0;
+    let curStart = 0;
+    let curHeading = '';
+    let curPath: string[] = [];
+    let inFence = false;
+    let started = false;
+
+    const pushCurrent = (end: number) => {
+        if (!started) return;
+        sections.push({ heading: curHeading, headingPath: [...curPath], start: curStart, end });
+    };
+
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i];
+        const lineStart = offset;
+        offset += line.length + 1; // +1 for the '\n' we split on
+
+        const fence = line.trimStart().startsWith('```');
+        if (fence) { inFence = !inFence; }
+
+        const m = !inFence ? line.match(HEADING_RE) : null;
+        if (m) {
+            // close previous section at this heading's start
+            pushCurrent(lineStart);
+            const level = m[1].length;
+            const title = m[2].trim();
+            // maintain breadcrumb stack by level
+            while (stack.length && stack[stack.length - 1].level >= level) stack.pop();
+            stack.push({ level, title });
+            curStart = lineStart;
+            curHeading = title;
+            curPath = stack.map((s) => s.title);
+            started = true;
+        } else if (!started) {
+            // preamble before the first heading
+            started = true;
+            curStart = 0;
+            curHeading = '';
+            curPath = [];
+        }
+    }
+    pushCurrent(content.length);
+    return sections.filter((s) => s.end > s.start);
+}
+
+/** 긴 텍스트를 문단(\n\n) 경계로 target 길이 이하 조각으로. 단일 문단이 maxChars 초과면 하드 컷. */
+function splitLongText(text: string, target: number, maxChars: number): string[] {
+    if (text.length <= maxChars) return [text];
+    const paras = text.split(/\n{2,}/);
+    const pieces: string[] = [];
+    let buf = '';
+    const flush = () => { if (buf.trim()) pieces.push(buf); buf = ''; };
+    for (const para of paras) {
+        if (para.length > maxChars) {
+            flush();
+            // hard slice a giant paragraph
+            for (let i = 0; i < para.length; i += target) pieces.push(para.slice(i, i + target));
+            continue;
+        }
+        if (buf && (buf.length + para.length + 2) > target) flush();
+        buf = buf ? `${buf}\n\n${para}` : para;
+    }
+    flush();
+    return pieces.length ? pieces : [text];
+}
+
+/**
+ * 원문을 섹션 청크로 분해. 짧은 섹션 병합 + 긴 섹션 재분할 적용.
+ * 결과가 비면(빈 파일 등) 전체를 한 청크로 반환.
+ */
+export function splitIntoSections(content: string, opts?: Partial<ChunkOptions>): Section[] {
+    const o: ChunkOptions = { ...DEFAULTS, ...(opts || {}) };
+    if (o.maxChars < o.targetChars) o.maxChars = o.targetChars * 2;
+
+    const raw = parseRawSections(content);
+    if (raw.length === 0) {
+        const t = content.trim();
+        return t ? [{ heading: '', headingPath: [], text: content, charStart: 0, charEnd: content.length }] : [];
+    }
+
+    // 1) 짧은 섹션 병합 — 연속이므로 [firstStart, lastEnd] 로 span 유지.
+    const merged: RawSection[] = [];
+    let buf: RawSection | null = null;
+    for (const s of raw) {
+        if (!buf) { buf = { ...s }; continue; }
+        const bufLen = buf.end - buf.start;
+        if (bufLen < o.minChars) {
+            buf = { heading: buf.heading, headingPath: buf.headingPath, start: buf.start, end: s.end };
+        } else {
+            merged.push(buf);
+            buf = { ...s };
+        }
+        if ((buf.end - buf.start) >= o.targetChars) { merged.push(buf); buf = null; }
+    }
+    if (buf) merged.push(buf);
+
+    // 2) 긴 섹션 재분할 + Section 객체화.
+    const out: Section[] = [];
+    for (const s of merged) {
+        const text = content.slice(s.start, s.end);
+        if (text.length <= o.maxChars) {
+            out.push({ heading: s.heading, headingPath: s.headingPath, text, charStart: s.start, charEnd: s.end });
+            continue;
+        }
+        let cursor = s.start;
+        for (const piece of splitLongText(text, o.targetChars, o.maxChars)) {
+            const idx = content.indexOf(piece, cursor);
+            const start = idx >= 0 ? idx : cursor;
+            const end = start + piece.length;
+            out.push({ heading: s.heading, headingPath: s.headingPath, text: piece, charStart: start, charEnd: end });
+            cursor = end;
+        }
+    }
+    return out.filter((s) => s.text.trim().length > 0);
+}
@@ -0,0 +1,218 @@
+/**
+ * ============================================================
+ * Retrieval Evaluation Harness
+ *
+ * 골든셋(질문 → 기대 문서)으로 brain 검색 품질을 recall@k / MRR 로 *결정적으로* 측정한다.
+ * 청킹·re-rank·embedding alpha 등 어떤 변경이 실제로 recall 을 올렸는지 숫자로 증명하기
+ * 위한 토대 — 이게 있어야 RAG 개선이 "감(感)" 이 아니라 무결성 있는 엔지니어링이 된다.
+ *
+ * 의도적으로 LLM 을 쓰지 않는다 (재현 가능 + 무료 + CI 가능). LLM-as-Judge 기반의
+ * faithfulness/answer-relevance 평가는 후속 단계에서 별도 하니스로 추가한다.
+ *
+ * 골든셋 위치: <brain>/.astra/eval/golden.jsonl  (한 줄당 JSON 1개)
+ *   { "query": "RAG 청킹 전략 비교", "expected": ["문서 청킹 전략.md"], "note": "선택" }
+ * `expected` 매칭은 대소문자 무시 + 경로 suffix 매칭이라 사용자가 파일명만 적어도 된다
+ * (예: "문서 청킹 전략.md" 가 "10_Wiki/Topics/Topics_Rag/문서 청킹 전략.md" 에 매치).
+ * ============================================================
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+
+export const GOLDEN_REL_JSONL = path.join('.astra', 'eval', 'golden.jsonl');
+export const GOLDEN_REL_JSON = path.join('.astra', 'eval', 'golden.json');
+
+export interface GoldenEntry {
+    query: string;
+    /** 기대 문서 — 상대 경로 또는 파일명. 하나라도 top-k 에 들면 hit. */
+    expected: string[];
+    note?: string;
+}
+
+export interface PerQueryResult {
+    query: string;
+    expected: string[];
+    /** 1-based rank of the first expected doc, or null if not in the returned ranking. */
+    firstHitRank: number | null;
+    /** k → 기대 문서가 top-k 안에 하나라도 있었는지. */
+    hitAtK: Record<number, boolean>;
+    /** 디버그용 — 검색이 실제로 반환한 상위 경로들. */
+    topPaths: string[];
+}
+
+export interface EvalReport {
+    ks: number[];
+    total: number;
+    /** k → recall@k (= hit-rate, 기대 문서가 top-k 에 든 질의 비율). */
+    recallAtK: Record<number, number>;
+    /** Mean Reciprocal Rank — 첫 hit 의 1/rank 평균. miss 는 0. */
+    mrr: number;
+    perQuery: PerQueryResult[];
+}
+
+/** 골든셋 작성 안내가 포함된 스캐폴드 템플릿 (jsonl — 주석 줄은 로더가 무시). */
+export const GOLDEN_TEMPLATE = [
+    '// Astra 검색 평가 골든셋. 한 줄당 JSON 1개. `//` 로 시작하는 줄과 빈 줄은 무시됩니다.',
+    '// query: 실제로 던질 질문.  expected: 그 질문에 떠야 하는 문서(상대경로 또는 파일명) 목록.',
+    '// 20~30개를 채우면 신뢰할 만한 baseline 이 됩니다. 예시 두 줄을 지우고 본인 두뇌에 맞게 작성하세요.',
+    '{"query": "RAG 청킹 전략은 어떤 게 있나", "expected": ["문서 청킹 전략.md"]}',
+    '{"query": "벡터 데이터베이스 어떤 걸 골라야 하나", "expected": ["벡터 데이터베이스 비교.md"]}',
+    '',
+].join('\n');
+
+function normRel(p: string): string {
+    return (p || '').replace(/\\/g, '/').trim().toLowerCase();
+}
+
+/** ranked 의 한 경로가 expected 항목과 매치되는지: 정확히 같거나, suffix(파일명만 적은 경우)거나. */
+function pathMatches(rankedRel: string, expected: string): boolean {
+    const a = normRel(rankedRel);
+    const b = normRel(expected);
+    if (!a || !b) return false;
+    if (a === b) return true;
+    // expected 가 파일명/부분 경로면 ranked 의 끝과 매치 (구분자 경계 존중).
+    return a === b || a.endsWith('/' + b) || a.endsWith(b) && (a.length === b.length || a[a.length - b.length - 1] === '/');
+}
+
+/**
+ * 골든셋 로드. jsonl 우선, 없으면 json 배열. 파일이 없으면 [] 반환 (호출자가 스캐폴드 안내).
+ * 깨진 줄은 건너뛴다 (한 줄 오타가 전체 평가를 막지 않도록).
+ */
+export function loadGoldenSet(brainPath: string): { entries: GoldenEntry[]; sourcePath: string | null; parseErrors: number } {
+    const jsonlPath = path.join(brainPath, GOLDEN_REL_JSONL);
+    const jsonPath = path.join(brainPath, GOLDEN_REL_JSON);
+
+    let raw = '';
+    let sourcePath: string | null = null;
+    if (fs.existsSync(jsonlPath)) {
+        try { raw = fs.readFileSync(jsonlPath, 'utf8'); sourcePath = jsonlPath; } catch { /* fall through */ }
+    }
+    if (!sourcePath && fs.existsSync(jsonPath)) {
+        try {
+            const arr = JSON.parse(fs.readFileSync(jsonPath, 'utf8'));
+            const entries = Array.isArray(arr) ? arr.filter(isValidEntry) : [];
+            return { entries, sourcePath: jsonPath, parseErrors: 0 };
+        } catch {
+            return { entries: [], sourcePath: jsonPath, parseErrors: 1 };
+        }
+    }
+    if (!sourcePath) return { entries: [], sourcePath: null, parseErrors: 0 };
+
+    const entries: GoldenEntry[] = [];
+    let parseErrors = 0;
+    for (const line of raw.split(/\r?\n/)) {
+        const t = line.trim();
+        if (!t || t.startsWith('//') || t.startsWith('#')) continue;
+        try {
+            const obj = JSON.parse(t);
+            if (isValidEntry(obj)) entries.push(obj);
+            else parseErrors++;
+        } catch {
+            parseErrors++;
+        }
+    }
+    return { entries, sourcePath, parseErrors };
+}
+
+function isValidEntry(o: any): o is GoldenEntry {
+    return o && typeof o.query === 'string' && o.query.trim().length > 0
+        && Array.isArray(o.expected) && o.expected.length > 0
+        && o.expected.every((e: any) => typeof e === 'string');
+}
+
+/**
+ * 평가 실행. `ranker` 는 한 질의에 대해 검색이 반환한 *상대 경로 랭킹(점수 내림차순)* 을
+ * 돌려주는 함수다 (임베딩 배선은 호출자가 책임 → 이 모듈은 LLM/네트워크 의존 없이 순수).
+ */
+export async function runRetrievalEval(params: {
+    entries: GoldenEntry[];
+    ks: number[];
+    ranker: (query: string) => Promise<string[]>;
+}): Promise<EvalReport> {
+    const ks = [...params.ks].sort((a, b) => a - b);
+    const perQuery: PerQueryResult[] = [];
+
+    for (const entry of params.entries) {
+        let ranked: string[] = [];
+        try {
+            ranked = await params.ranker(entry.query);
+        } catch {
+            ranked = [];
+        }
+        let firstHitRank: number | null = null;
+        for (let i = 0; i < ranked.length; i++) {
+            if (entry.expected.some((exp) => pathMatches(ranked[i], exp))) {
+                firstHitRank = i + 1;
+                break;
+            }
+        }
+        const hitAtK: Record<number, boolean> = {};
+        for (const k of ks) hitAtK[k] = firstHitRank !== null && firstHitRank <= k;
+
+        perQuery.push({
+            query: entry.query,
+            expected: entry.expected,
+            firstHitRank,
+            hitAtK,
+            topPaths: ranked.slice(0, Math.max(...ks, 5)),
+        });
+    }
+
+    const total = perQuery.length || 1;
+    const recallAtK: Record<number, number> = {};
+    for (const k of ks) {
+        const hits = perQuery.filter((q) => q.hitAtK[k]).length;
+        recallAtK[k] = hits / total;
+    }
+    const mrr = perQuery.reduce((sum, q) => sum + (q.firstHitRank ? 1 / q.firstHitRank : 0), 0) / total;
+
+    return { ks, total: perQuery.length, recallAtK, mrr, perQuery };
+}
+
+/** 사람이 읽는 마크다운 리포트. baseline 비교를 위해 표 형태로. */
+export function formatReportMarkdown(report: EvalReport, meta: { brainName: string; dateStr: string; embeddingModel: string; alpha: number; notes?: string }): string {
+    const lines: string[] = [];
+    lines.push(`# Astra 검색 평가 리포트`);
+    lines.push('');
+    lines.push(`- 두뇌: **${meta.brainName}**`);
+    lines.push(`- 일시: ${meta.dateStr}`);
+    lines.push(`- 임베딩: ${meta.embeddingModel || '(없음 — TF-IDF only)'}${meta.embeddingModel ? ` · alpha=${meta.alpha}` : ''}`);
+    lines.push(`- 질의 수: ${report.total}`);
+    if (meta.notes) lines.push(`- 메모: ${meta.notes}`);
+    lines.push('');
+    lines.push(`## 종합 지표`);
+    lines.push('');
+    lines.push(`| 지표 | 값 |`);
+    lines.push(`|---|---|`);
+    for (const k of report.ks) lines.push(`| recall@${k} | ${(report.recallAtK[k] * 100).toFixed(1)}% |`);
+    lines.push(`| MRR | ${report.mrr.toFixed(3)} |`);
+    lines.push('');
+    lines.push(`> recall@k = 기대 문서가 상위 k개 안에 든 질의 비율. MRR = 첫 정답의 1/순위 평균 (1에 가까울수록 좋음).`);
+    lines.push('');
+    lines.push(`## 질의별 상세`);
+    lines.push('');
+    lines.push(`| # | 질의 | 첫 정답 순위 | top-k hit | 기대 문서 |`);
+    lines.push(`|---|---|---|---|---|`);
+    report.perQuery.forEach((q, i) => {
+        const rank = q.firstHitRank ? `#${q.firstHitRank}` : '**miss**';
+        const kHits = report.ks.map((k) => q.hitAtK[k] ? `@${k}✓` : `@${k}✗`).join(' ');
+        const exp = q.expected.join(', ').replace(/\|/g, '\\|');
+        const query = q.query.replace(/\|/g, '\\|').slice(0, 60);
+        lines.push(`| ${i + 1} | ${query} | ${rank} | ${kHits} | ${exp} |`);
+    });
+    lines.push('');
+
+    // miss 한 질의는 무엇이 떴는지 별도로 — 골든셋 수정 vs 엔진 개선을 가르는 진단.
+    const misses = report.perQuery.filter((q) => q.firstHitRank === null);
+    if (misses.length > 0) {
+        lines.push(`## Miss 진단 (top 결과가 기대와 어긋난 질의)`);
+        lines.push('');
+        for (const q of misses) {
+            lines.push(`- **${q.query}**`);
+            lines.push(`  - 기대: ${q.expected.join(', ')}`);
+            lines.push(`  - 실제 상위: ${q.topPaths.length ? q.topPaths.slice(0, 5).join(' · ') : '(검색 결과 없음)'}`);
+        }
+        lines.push('');
+    }
+    return lines.join('\n');
+}
@@ -21,7 +21,7 @@ import { MemoryManager } from '../memory';
 import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types';
 import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt, extractBestSection } from './scoring';
 import { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
-import { getBrainTokenIndex, getBrainEmbeddings } from './brainIndex';
+import { getBrainTokenIndex, getBrainEmbeddings, getBrainChunkIndex, getBrainChunkEmbeddings } from './brainIndex';
 import { extractLessonEssence } from './lessonHelpers';
 import { cosineSimilarity } from './embeddings';
 import { applyActionabilityBoost, WorkStateSignals, ActionabilityWeights } from './actionabilityScoring';
@@ -97,6 +97,14 @@ interface RetrievalOptions {
    hierarchicalReweightEnabled?: boolean;
    /** Hierarchical 가중치 override. undefined 면 default. */
    hierarchicalWeights?: HierarchicalWeights;
+    /**
+     * Section-level chunking (Phase 1-가). true 면 brain 검색이 파일이 아니라 섹션 청크
+     * 단위로 색인·스코어링하고, 매치된 *섹션* 을 그대로 주입한다. false/undefined 면 기존
+     * 파일 단위 동작.
+     */
+    chunkLevelRetrieval?: boolean;
+    /** 섹션 청크 목표 길이(문자). 기본 1200. chunkLevelRetrieval 일 때만 사용. */
+    chunkTargetChars?: number;
 }

 export class RetrievalOrchestrator {
@@ -129,7 +137,9 @@ export class RetrievalOrchestrator {
                scopeFolders,
                options.queryEmbedding,
                options.embeddingModel,
-                options.embeddingBlendAlpha
+                options.embeddingBlendAlpha,
+                options.chunkLevelRetrieval || false,
+                options.chunkTargetChars ?? 1200,
            )
            : [];
        allChunks.push(...brainChunks);
@@ -213,6 +223,58 @@ export class RetrievalOrchestrator {
        return assembleContext(result.selectedChunks);
    }

+    /**
+     * 평가 전용 — 한 질의에 대한 brain 파일 랭킹(점수 내림차순)을 *context budget 적용 전*
+     * 으로 반환한다. recall@k / MRR 계산용. 프로덕션 `retrieve()` 와 동일한 scoring 경로
+     * (`searchBrainFiles`) 를 그대로 재사용하므로, 측정값이 실제 검색 동작을 반영한다 (무결성).
+     */
+    public rankBrainForEval(
+        query: string,
+        brain: BrainProfile,
+        opts: {
+            limit?: number;
+            scopeFolders?: string[];
+            includeRawConversations?: boolean;
+            queryEmbedding?: number[];
+            embeddingModel?: string;
+            embeddingBlendAlpha?: number;
+            chunkLevelRetrieval?: boolean;
+            chunkTargetChars?: number;
+        } = {},
+    ): Array<{ relativePath: string; filePath: string; score: number }> {
+        const limit = opts.limit ?? 20;
+        const expandedTokens = expandQuery(tokenize(query));
+        // chunk 모드는 파일당 여러 청크를 반환하므로, recall 을 *파일 단위* 로 측정하려면
+        // 넉넉히 받아 dedup 한다 (limit 개의 고유 파일 확보).
+        const internalLimit = opts.chunkLevelRetrieval ? limit * 3 : limit;
+        const chunks = this.searchBrainFiles(
+            query,
+            expandedTokens,
+            brain,
+            internalLimit,
+            opts.includeRawConversations ?? false,
+            opts.scopeFolders ?? [],
+            opts.queryEmbedding,
+            opts.embeddingModel,
+            opts.embeddingBlendAlpha,
+            opts.chunkLevelRetrieval || false,
+            opts.chunkTargetChars ?? 1200,
+        );
+        // dedup by file, 점수 내림차순 순서 유지 → 파일 단위 랭킹.
+        const out: Array<{ relativePath: string; filePath: string; score: number }> = [];
+        const seen = new Set<string>();
+        const brainRoot = brain.localBrainPath;
+        for (const c of chunks) {
+            const filePath = (c.metadata.filePath as string) || '';
+            if (!filePath || seen.has(filePath)) continue;
+            seen.add(filePath);
+            const relativePath = filePath ? (path.relative(brainRoot, filePath) || c.title) : c.title;
+            out.push({ relativePath, filePath, score: c.score });
+            if (out.length >= limit) break;
+        }
+        return out;
+    }
+
    // ─── Brain File Search ───

    private searchBrainFiles(
@@ -225,16 +287,29 @@ export class RetrievalOrchestrator {
        queryEmbedding?: number[],
        embeddingModel?: string,
        embeddingBlendAlpha?: number,
+        chunkLevel: boolean = false,
+        chunkTargetChars: number = 1200,
    ): RetrievalChunk[] {
        try {
            const scoped = (file: string) => scopeFolders.length === 0
                || scopeFolders.some((folder) => isInside(folder, file));
            const allFiles = findBrainFiles(brain.localBrainPath)
                .filter(scoped)
-                .filter((file) => includeRaw || !this.isRawConversation(path.relative(brain.localBrainPath, file)));
+                .filter((file) => {
+                    const rel = path.relative(brain.localBrainPath, file);
+                    return (includeRaw || !this.isRawConversation(rel)) && !this.isOperationalPath(rel);
+                });

            if (allFiles.length === 0) return [];

+            // Phase 1-가: 섹션 청크 단위 검색 경로. 파일 단위와 분리해 회귀 위험 격리.
+            if (chunkLevel) {
+                return this.searchBrainChunks(
+                    expandedTokens, brain, allFiles, limit, chunkTargetChars,
+                    queryEmbedding, embeddingModel, embeddingBlendAlpha,
+                );
+            }
+
            // Tokenized docs from the persistent mtime-keyed index — unchanged files are not re-read
            // or re-tokenized, so per-query work over a large brain drops from O(total content) to O(files) stats.
            const indexed = getBrainTokenIndex(brain.localBrainPath, allFiles);
@@ -343,6 +418,118 @@ export class RetrievalOrchestrator {
        }
    }

+    // ─── Brain Chunk Search (Phase 1-가) ───
+
+    /**
+     * 섹션 청크 단위 검색. 파일 단위 `searchBrainFiles` 와 동일한 TF-IDF scoring 을
+     * *청크* 에 적용하고, 매치된 섹션 본문을 그대로 발췌(파일 모드의 read-time
+     * extractBestSection 불필요). dense blend 는 v1 에서 파일 단위 임베딩을 그 파일의
+     * 모든 청크에 공유 적용한다(청크별 임베딩은 후속 단계). 한 파일이 결과를 독식하지
+     * 않도록 파일당 청크 수를 제한한다.
+     */
+    private searchBrainChunks(
+        expandedTokens: string[],
+        brain: BrainProfile,
+        allFiles: string[],
+        limit: number,
+        chunkTargetChars: number,
+        queryEmbedding?: number[],
+        embeddingModel?: string,
+        embeddingBlendAlpha?: number,
+    ): RetrievalChunk[] {
+        const chunks = getBrainChunkIndex(brain.localBrainPath, allFiles, chunkTargetChars);
+        if (chunks.length === 0) return [];
+
+        const scored = scoreTfIdfPreTokenized(
+            expandedTokens,
+            chunks.map((c) => ({
+                tokens: c.tokens,
+                titleTokens: c.headingTokens,
+                lastModified: c.mtimeMs,
+                conflictCount: 0,
+            })),
+        );
+
+        // Hybrid: 청크 단위 임베딩(`${filePath}#${chunkIndex}`)으로 dense blend. 청크 벡터가
+        // 아직 없는 항목은 파일 단위 임베딩으로 fallback → 둘 다 없으면 순수 TF-IDF 유지.
+        if (queryEmbedding && embeddingModel && (embeddingBlendAlpha ?? 0) > 0) {
+            const alpha = Math.max(0, Math.min(1, embeddingBlendAlpha!));
+            const chunkEmb = getBrainChunkEmbeddings(brain.localBrainPath, embeddingModel);
+            const filePaths = Array.from(new Set(chunks.map((c) => c.filePath)));
+            const fileEmb = getBrainEmbeddings(brain.localBrainPath, filePaths, embeddingModel);
+            if (chunkEmb.size > 0 || fileEmb.size > 0) {
+                const maxTfidf = scored.reduce((m, s) => (s.score > m ? s.score : m), 0) || 1;
+                for (const s of scored) {
+                    const c = chunks[s.index];
+                    const vec = chunkEmb.get(`${c.filePath}#${c.chunkIndex}`) || fileEmb.get(c.filePath);
+                    if (!vec) continue;
+                    const cos = cosineSimilarity(queryEmbedding, vec);
+                    s.score = (1 - alpha) * (s.score / maxTfidf) + alpha * Math.max(0, cos);
+                }
+            }
+        }
+
+        const ranked = scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score);
+
+        // 파일당 청크 상한 — 한 문서가 top 슬롯을 독식하지 않게.
+        const PER_FILE_CAP = 3;
+        const perFile = new Map<string, number>();
+        const chosen: typeof ranked = [];
+        for (const s of ranked) {
+            const fp = chunks[s.index].filePath;
+            const n = perFile.get(fp) || 0;
+            if (n >= PER_FILE_CAP) continue;
+            perFile.set(fp, n + 1);
+            chosen.push(s);
+            if (chosen.length >= limit) break;
+        }
+
+        const fileContentCache = new Map<string, string>();
+        const readFile = (fp: string): string => {
+            let c = fileContentCache.get(fp);
+            if (c === undefined) {
+                try { c = fs.readFileSync(fp, 'utf8'); } catch { c = ''; }
+                fileContentCache.set(fp, c);
+            }
+            return c;
+        };
+
+        const topResults: RetrievalChunk[] = [];
+        for (const s of chosen) {
+            const c = chunks[s.index];
+            const content = readFile(c.filePath);
+            if (!content) continue;
+            const isLesson = (c.kind || '') !== '';
+            // 일반 노트: 매치된 섹션 본문 그대로. lesson 카드: 통째 청크라 essence 추출 유지.
+            let body = isLesson
+                ? (extractLessonEssence(content, 1200) || content.slice(c.charStart, c.charEnd))
+                : content.slice(c.charStart, c.charEnd);
+            const cap = isLesson ? 1200 : 700;
+            // 섹션 breadcrumb 을 본문 맨 앞에 — 모델이 어느 맥락의 섹션인지 알도록.
+            const crumb = !isLesson && c.headingPath.length ? `〔${c.headingPath.join(' › ')}〕\n` : '';
+            body = crumb + body.trim();
+            topResults.push({
+                id: `brain-chunk-${s.index}`,
+                source: 'brain-memory' as const,
+                title: c.relativePath,
+                content: summarizeText(body, cap + crumb.length),
+                score: s.score,
+                tokenEstimate: estimateTokens(body),
+                metadata: {
+                    filePath: c.filePath,
+                    category: this.inferCategory(c.relativePath),
+                    isProjectEvidence: this.isProjectEvidence(c.relativePath, content),
+                    lastUpdated: c.mtimeMs,
+                    conflictDetected: s.conflictDetected,
+                    conflictSeverity: s.conflictSeverity,
+                    queryCoverage: s.queryCoverage,
+                    ...(isLesson ? { isLesson: true, lessonKind: c.kind } : {}),
+                },
+            });
+        }
+        return topResults;
+    }
+
    // ─── Memory Layer Search ───

    private searchMemoryLayers(
@@ -531,6 +718,17 @@ export class RetrievalOrchestrator {
        return /(^|[\\/])(00_Raw|raw-data|conversations?|transcripts?)([\\/]|$)/i.test(relativePath);
    }

+    /**
+     * 운영(operational) 로그 — 지식이 아니라 세션/메모리/프로젝트 로그. 사용자 wiki taxonomy
+     * 에 정의된 폴더 fragment 들. 지식 검색에서 제외한다 (= raw 대화와 동일 취급). recall 지표를
+     * 올리진 않지만, 로그를 "지식"으로 끌어오는 의미적 오류와 인덱스/토큰 낭비를 막는다.
+     */
+    private isOperationalPath(relativePath: string): boolean {
+        return /(^|[\\/])(sessions|_agents|_company|memory|Project_Logs|_Archive_Orphans|Post_Drafts|UX_Scenarios)([\\/])/i.test(relativePath)
+            || /docs[\\/]records([\\/]|$)/i.test(relativePath)
+            || /Harness_Research_/i.test(relativePath);
+    }
+
    private inferCategory(relativePath: string): string {
        const normalized = relativePath.toLowerCase();
        if (/(decisions?|adr|planning)/i.test(normalized)) return 'decision';
@@ -1196,8 +1196,29 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
     */
    private async _commitBrainProfileChange(nextProfiles: any[], nextActiveId: string, systemMessage: string): Promise<void> {
        const cfg = vscode.workspace.getConfiguration('g1nation');
-        await cfg.update('brainProfiles', nextProfiles, vscode.ConfigurationTarget.Global);
-        await cfg.update('activeBrainId', nextActiveId, vscode.ConfigurationTarget.Global);
+        try {
+            await cfg.update('brainProfiles', nextProfiles, vscode.ConfigurationTarget.Global);
+            await cfg.update('activeBrainId', nextActiveId, vscode.ConfigurationTarget.Global);
+        } catch (err: any) {
+            logError('Failed to persist brain profiles.', { error: err?.message || String(err) });
+            vscode.window.showErrorMessage(`두뇌 프로필 저장 실패 (settings.json 쓰기 오류): ${err?.message ?? err}`);
+            throw err;
+        }
+        // Read-back 검증 — cfg.update 가 성공처럼 반환해도 effective config 에 반영 안 될 수 있다:
+        //   (a) Workspace/Folder scope 의 g1nation.brainProfiles 가 Global 값을 가림,
+        //   (b) settings.json 쓰기 권한/프로필 문제.
+        // 둘 다 화면상 "추가가 안 됨" 으로만 보였던 silent failure → 이제 명시적으로 알린다.
+        const written = vscode.workspace.getConfiguration('g1nation').get<any[]>('brainProfiles', []) || [];
+        const landed = written.some((p) => p && p.id === nextActiveId);
+        if (!landed) {
+            const inspected = vscode.workspace.getConfiguration('g1nation').inspect<any[]>('brainProfiles');
+            const hasWorkspace = !!(inspected?.workspaceValue || inspected?.workspaceFolderValue);
+            const reason = hasWorkspace
+                ? 'Workspace 설정(.vscode/settings.json)의 g1nation.brainProfiles 가 전역 값을 가리고 있습니다. 그 항목을 지우거나 그곳에 추가하세요.'
+                : 'settings.json 쓰기가 반영되지 않았습니다 (파일 권한 또는 VS Code 프로필 설정을 확인하세요).';
+            logError('Brain profile write did not land in effective config.', { hasWorkspace });
+            vscode.window.showErrorMessage(`두뇌 추가 실패: ${reason}`);
+        }
        this._currentSessionBrainId = nextActiveId;
        this._postBrainProfiles(nextProfiles, nextActiveId);
        await this._sendBrainStatus();
@@ -1205,48 +1226,46 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
    }

    async _addBrainProfile() {
-        const selected = await vscode.window.showOpenDialog({
-            canSelectFiles: false,
-            canSelectFolders: true,
-            canSelectMany: false,
-            openLabel: 'Use as Brain'
-        });
+        try {
+            const selected = await vscode.window.showOpenDialog({
+                canSelectFiles: false,
+                canSelectFolders: true,
+                canSelectMany: false,
+                openLabel: '이 폴더를 두뇌로 사용'
+            });

-        const folder = selected?.[0]?.fsPath;
-        if (!folder) return;
+            const folder = selected?.[0]?.fsPath;
+            if (!folder) return; // 폴더 선택 취소 — 정상 종료 (에러 아님)

-        const defaultName = path.basename(folder) || 'New Brain';
-        const name = await vscode.window.showInputBox({
-            prompt: 'Name this brain profile',
-            value: defaultName,
-            validateInput: (value) => value.trim() ? null : 'Brain name is required.'
-        });
-        if (!name) return;
+            // 구조 개선: 예전엔 폴더 선택 후 이름·설명·repo 입력창 3개가 연속으로 떴고, '이름' 입력창을
+            // Esc/바깥클릭으로 닫으면 `if (!name) return` 으로 전체 추가가 *조용히* 취소됐다. 이것이
+            // "추가가 안 된다" 의 주원인. 이제 폴더만 있으면 추가가 보장되고, 이름은 비우거나 취소해도
+            // 폴더명으로 진행한다. 설명/repo 는 추가 후 [수정] 에서 채운다 (다이얼로그 체인 최소화).
+            const defaultName = path.basename(folder) || 'New Brain';
+            const nameInput = await vscode.window.showInputBox({
+                prompt: '두뇌 이름 (비워두면 폴더명 사용)',
+                value: defaultName
+            });
+            const name = (nameInput && nameInput.trim()) ? nameInput.trim() : defaultName;

-        const description = await vscode.window.showInputBox({
-            prompt: 'Optional description shown in the Astra sidebar',
-            value: ''
-        });
-
-        const repo = await vscode.window.showInputBox({
-            prompt: 'Optional Second Brain Git repository URL',
-            value: ''
-        });
-
-        // Read raw settings directly to avoid virtual default-brain (injected in memory by getConfig())
-        // being saved into the settings file and corrupting the profile list on next load.
-        const cfg = vscode.workspace.getConfiguration('g1nation');
-        const existingRaw: any[] = cfg.get<any[]>('brainProfiles', []) || [];
-        const id = generateUniqueBrainId(name, existingRaw);
-        const newProfile = {
-            id,
-            name: name.trim(),
-            localBrainPath: folder,
-            secondBrainRepo: (repo || '').trim(),
-            description: (description || '').trim()
-        };
-        const nextProfiles = [...existingRaw, newProfile];
-        await this._commitBrainProfileChange(nextProfiles, id, `**[Brain Added]** ${name.trim()}\n\`${folder}\``);
+            // getConfig() 가 메모리에 주입하는 가상 default-brain 이 저장되지 않도록 raw 설정을 직접 읽는다.
+            const cfg = vscode.workspace.getConfiguration('g1nation');
+            const existingRaw: any[] = cfg.get<any[]>('brainProfiles', []) || [];
+            const id = generateUniqueBrainId(name, existingRaw);
+            const newProfile = {
+                id,
+                name,
+                localBrainPath: folder,
+                secondBrainRepo: '',
+                description: ''
+            };
+            const nextProfiles = [...existingRaw, newProfile];
+            await this._commitBrainProfileChange(nextProfiles, id, `**[Brain Added]** ${name}\n\`${folder}\``);
+            vscode.window.showInformationMessage(`두뇌 추가됨: ${name}`);
+        } catch (err: any) {
+            logError('Failed to add brain profile.', { error: err?.message || String(err) });
+            vscode.window.showErrorMessage(`두뇌 추가 중 오류: ${err?.message ?? err}`);
+        }
    }

    async _editBrainProfile(profileId?: string) {