connectai/src/retrieval/index.ts

/**
 * ============================================================
 * RetrievalOrchestrator — Unified RAG Pipeline
 *
 * Astra의 모든 검색 소스를 통합 관리하는 오케스트레이터입니다.
 *
 * 검색 흐름:
 * ① Query Planning   — 의도 분류 + 검색 전략 결정
 * ② Parallel Search   — Brain + Memory + Project + Episode 동시 검색
 * ③ Result Fusion     — 통합 스코어링 + 중복 제거
 * ④ Context Budget    — 토큰 예산 내에서 최종 선택
 * ============================================================
 */

import * as fs from 'fs';
import * as path from 'path';
import { BrainProfile } from '../config';
import { findBrainFiles, summarizeText } from '../utils';
import { isInside } from '../lib/paths';
import { MemoryManager } from '../memory';
import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types';
import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt, extractBestSection } from './scoring';
import { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
import { getBrainTokenIndex, getBrainEmbeddings, getBrainChunkIndex, getBrainChunkEmbeddings } from './brainIndex';
import { extractLessonEssence } from './lessonHelpers';
import { cosineSimilarity } from './embeddings';
import { applyActionabilityBoost, WorkStateSignals, ActionabilityWeights } from './actionabilityScoring';
import { applyHierarchicalReweight, classifyQueryLevel, AbstractionLevel, HierarchicalWeights } from './hierarchicalLevel';

export { tokenize, expandQuery, scoreTfIdf, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
export { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
export { getBrainTokenIndex, clearBrainTokenIndex } from './brainIndex';
export * from './types';

/** Compact summary of a past chat session for medium-term memory retrieval. */
export interface RecentSessionSummary {
    id: string;
    title: string;
    firstUserMsg: string;
    lastAssistantExcerpt: string;
    /**
     * Optional LLM-compressed recap stored at session end (~200 chars).
     * When present, retrieval uses this instead of the firstUserMsg+tail
     * fragment because it actually captures the decision/outcome.
     */
    summary?: string;
    timestamp: number;
}

interface RetrievalOptions {
    brain: BrainProfile;
    memoryManager: MemoryManager;
    workspacePath?: string;
    chatHistory?: Array<{ role: string; content: string }>;
    contextBudget?: Partial<ContextBudgetConfig>;
    brainFileLimit?: number;
    includeRawConversations?: boolean;
    /**
     * Optional absolute folder paths constraining brain-file search to those
     * subtrees. When provided and non-empty, only brain files inside one of
     * the folders are considered. Empty / undefined preserves whole-brain
     * search (legacy behavior). Folders that escape the brain root are
     * silently dropped by the caller (see `agentKnowledgeMap.resolveScopeForAgent`).
     */
    scopeFolders?: string[];
    /**
     * Compact summaries of recently-touched chat sessions (excluding the
     * active one). Scored against the query and the top `mediumTermLimit`
     * are injected as medium-term memory chunks. Caller pre-computes these
     * to avoid threading vscode/ExtensionContext through this module.
     */
    recentSessions?: RecentSessionSummary[];
    /** Max number of medium-term session chunks to include after scoring. */
    mediumTermLimit?: number;
    /**
     * Optional query embedding for hybrid (sparse+dense) brain search. When
     * provided, each candidate file's cached embedding is cosine-matched and
     * blended with the TF-IDF score by `embeddingBlendAlpha`. Caller computes
     * this once per turn so we don't pay the embedding RTT inside scoring.
     */
    queryEmbedding?: number[];
    /** Embedding model name (used as a cache key on the brain index side). */
    embeddingModel?: string;
    /** Blend weight: 0 = TF-IDF only, 1 = cosine only. Default 0.5. */
    embeddingBlendAlpha?: number;
    /**
     * Actionability — "현재 작업 상태" 신호(최근 슬래시 명령 + 열린 파일) 로 검색 결과 재가중.
     * undefined 면 actionability re-rank 안 함 (legacy 동작).
     */
    workStateSignals?: WorkStateSignals;
    /** Actionability 결합 가중치. undefined 면 default. */
    actionabilityWeights?: ActionabilityWeights;
    /**
     * Hierarchical Context Window — 질의·문서 추상도 매칭 재가중.
     * true 면 query 추상도 분류 후 chunks 재가중. false / undefined 면 skip.
     */
    hierarchicalReweightEnabled?: boolean;
    /** Hierarchical 가중치 override. undefined 면 default. */
    hierarchicalWeights?: HierarchicalWeights;
    /**
     * Section-level chunking (Phase 1-가). true 면 brain 검색이 파일이 아니라 섹션 청크
     * 단위로 색인·스코어링하고, 매치된 *섹션* 을 그대로 주입한다. false/undefined 면 기존
     * 파일 단위 동작.
     */
    chunkLevelRetrieval?: boolean;
    /** 섹션 청크 목표 길이(문자). 기본 1200. chunkLevelRetrieval 일 때만 사용. */
    chunkTargetChars?: number;
}

export class RetrievalOrchestrator {
    /**
     * 통합 검색을 수행합니다.
     * 모든 소스에서 검색 → TF-IDF 스코어링 → 중복 제거 → 예산 내 선택
     */
    public retrieve(query: string, options: RetrievalOptions): RetrievalResult {
        const fusionLog: string[] = [];
        const allChunks: RetrievalChunk[] = [];
        const queryTokens = tokenize(query);
        const expandedTokens = expandQuery(queryTokens);

        fusionLog.push(`Query tokens: [${queryTokens.slice(0, 10).join(', ')}]`);
        fusionLog.push(`Expanded tokens: [${expandedTokens.slice(0, 15).join(', ')}]`);

        // ── ① Brain File Search (TF-IDF enhanced, optionally hybrid with embeddings) ──
        // `brainFileLimit === 0` is meaningful (Knowledge Mix "model knowledge only"
        // mode), so use `??` rather than `||`. When the caller explicitly passes 0,
        // we skip retrieval entirely instead of falling back to the default of 8.
        const scopeFolders = options.scopeFolders ?? [];
        const brainFileLimit = options.brainFileLimit ?? 8;
        const brainChunks = brainFileLimit > 0
            ? this.searchBrainFiles(
                query,
                expandedTokens,
                options.brain,
                brainFileLimit,
                options.includeRawConversations || false,
                scopeFolders,
                options.queryEmbedding,
                options.embeddingModel,
                options.embeddingBlendAlpha,
                options.chunkLevelRetrieval || false,
                options.chunkTargetChars ?? 1200,
            )
            : [];
        allChunks.push(...brainChunks);
        fusionLog.push(
            brainFileLimit === 0
                ? 'Brain search: skipped (Knowledge Mix weight = 0)'
                : scopeFolders.length > 0
                    ? `Brain search (scoped to ${scopeFolders.length} folder(s)): ${brainChunks.length} chunks`
                    : `Brain search: ${brainChunks.length} chunks found`
        );

        // ── ② Memory Layers ──
        const memoryChunks = this.searchMemoryLayers(
            query,
            options.memoryManager,
            options.chatHistory || [],
            options.workspacePath
        );
        allChunks.push(...memoryChunks);
        fusionLog.push(`Memory search: ${memoryChunks.length} chunks found`);

        // ── ②-b Medium-Term Memory (recent sessions) ──
        const mediumChunks = this.scoreRecentSessions(
            expandedTokens,
            options.recentSessions || [],
            options.mediumTermLimit ?? 0
        );
        allChunks.push(...mediumChunks);
        fusionLog.push(`Medium-term sessions: ${mediumChunks.length} chunks selected`);

        // ── ③ Result Fusion — normalize scores across sources ──
        this.normalizeScores(allChunks);
        fusionLog.push(`Total chunks before budget: ${allChunks.length}`);

        // ── ③-b Actionability Re-rank — work-state 신호로 점수 boost ──
        // normalize 직후, budget 전 — actionability 가 어떤 chunk 가 살아남는지에 영향.
        if (options.workStateSignals) {
            applyActionabilityBoost(allChunks, options.workStateSignals, options.actionabilityWeights);
            const boosted = allChunks.filter((c) => (c.metadata as any).actionabilityScore > 0).length;
            const cmds = options.workStateSignals.recentSlashCommands.slice(0, 3).join(',');
            const openFile = options.workStateSignals.openFilePath ? path.basename(options.workStateSignals.openFilePath) : '-';
            fusionLog.push(`Actionability re-rank: ${boosted} chunks boosted (cmds=[${cmds}], openFile=${openFile})`);
        }

        // ── ③-c Hierarchical Context Window — 추상도 레벨 매칭 ──
        // 질의·문서 추상도 매칭 점수 조정. 같은 레벨 bonus, 양 끝 mismatch penalty.
        // Actionability 직후 — 두 재가중을 합쳐 한 번의 budget selection.
        if (options.hierarchicalReweightEnabled) {
            const queryLevel = classifyQueryLevel(query);
            const { sameLevel, farMismatch } = applyHierarchicalReweight(allChunks, queryLevel, options.hierarchicalWeights);
            fusionLog.push(`Hierarchical re-rank (query=${queryLevel}): ${sameLevel} same-level (+), ${farMismatch} far-mismatch (-)`);
        }

        // ── ④ Context Budget Selection ──
        const { selected, dropped, tokensUsed } = selectWithinBudget(
            allChunks,
            options.contextBudget
        );
        // Pull lesson/playbook/qa-finding chunks out so callers can inject them as a prominent
        // "verify before finalizing" block rather than burying them in the brain-knowledge section.
        const lessonChunks = selected.filter((c) => c.metadata.isLesson);
        const selectedChunks = selected.filter((c) => !c.metadata.isLesson);
        fusionLog.push(`Selected: ${selectedChunks.length} (+${lessonChunks.length} lesson), Dropped: ${dropped.length}, Tokens: ${tokensUsed}`);

        return {
            query,
            totalChunks: allChunks.length,
            selectedChunks,
            droppedChunks: dropped,
            lessonChunks,
            totalTokensUsed: tokensUsed,
            contextBudget: options.contextBudget?.totalBudget || 8000,
            fusionLog
        };
    }

    /**
     * 검색 결과를 최종 컨텍스트 문자열로 변환합니다 (레슨 청크는 제외 — 별도 블록으로 주입).
     */
    public buildContextString(result: RetrievalResult): string {
        return assembleContext(result.selectedChunks);
    }

    /**
     * 평가 전용 — 한 질의에 대한 brain 파일 랭킹(점수 내림차순)을 *context budget 적용 전*
     * 으로 반환한다. recall@k / MRR 계산용. 프로덕션 `retrieve()` 와 동일한 scoring 경로
     * (`searchBrainFiles`) 를 그대로 재사용하므로, 측정값이 실제 검색 동작을 반영한다 (무결성).
     */
    public rankBrainForEval(
        query: string,
        brain: BrainProfile,
        opts: {
            limit?: number;
            scopeFolders?: string[];
            includeRawConversations?: boolean;
            queryEmbedding?: number[];
            embeddingModel?: string;
            embeddingBlendAlpha?: number;
            chunkLevelRetrieval?: boolean;
            chunkTargetChars?: number;
        } = {},
    ): Array<{ relativePath: string; filePath: string; score: number }> {
        const limit = opts.limit ?? 20;
        const expandedTokens = expandQuery(tokenize(query));
        // chunk 모드는 파일당 여러 청크를 반환하므로, recall 을 *파일 단위* 로 측정하려면
        // 넉넉히 받아 dedup 한다 (limit 개의 고유 파일 확보).
        const internalLimit = opts.chunkLevelRetrieval ? limit * 3 : limit;
        const chunks = this.searchBrainFiles(
            query,
            expandedTokens,
            brain,
            internalLimit,
            opts.includeRawConversations ?? false,
            opts.scopeFolders ?? [],
            opts.queryEmbedding,
            opts.embeddingModel,
            opts.embeddingBlendAlpha,
            opts.chunkLevelRetrieval || false,
            opts.chunkTargetChars ?? 1200,
        );
        // dedup by file, 점수 내림차순 순서 유지 → 파일 단위 랭킹.
        const out: Array<{ relativePath: string; filePath: string; score: number }> = [];
        const seen = new Set<string>();
        const brainRoot = brain.localBrainPath;
        for (const c of chunks) {
            const filePath = (c.metadata.filePath as string) || '';
            if (!filePath || seen.has(filePath)) continue;
            seen.add(filePath);
            const relativePath = filePath ? (path.relative(brainRoot, filePath) || c.title) : c.title;
            out.push({ relativePath, filePath, score: c.score });
            if (out.length >= limit) break;
        }
        return out;
    }

    // ─── Brain File Search ───

    private searchBrainFiles(
        query: string,
        expandedTokens: string[],
        brain: BrainProfile,
        limit: number,
        includeRaw: boolean,
        scopeFolders: string[] = [],
        queryEmbedding?: number[],
        embeddingModel?: string,
        embeddingBlendAlpha?: number,
        chunkLevel: boolean = false,
        chunkTargetChars: number = 1200,
    ): RetrievalChunk[] {
        try {
            const scoped = (file: string) => scopeFolders.length === 0
                || scopeFolders.some((folder) => isInside(folder, file));
            const allFiles = findBrainFiles(brain.localBrainPath)
                .filter(scoped)
                .filter((file) => {
                    const rel = path.relative(brain.localBrainPath, file);
                    return (includeRaw || !this.isRawConversation(rel)) && !this.isOperationalPath(rel);
                });

            if (allFiles.length === 0) return [];

            // Phase 1-가: 섹션 청크 단위 검색 경로. 파일 단위와 분리해 회귀 위험 격리.
            if (chunkLevel) {
                return this.searchBrainChunks(
                    expandedTokens, brain, allFiles, limit, chunkTargetChars,
                    queryEmbedding, embeddingModel, embeddingBlendAlpha,
                );
            }

            // Tokenized docs from the persistent mtime-keyed index — unchanged files are not re-read
            // or re-tokenized, so per-query work over a large brain drops from O(total content) to O(files) stats.
            const indexed = getBrainTokenIndex(brain.localBrainPath, allFiles);
            if (indexed.length === 0) return [];

            const scored = scoreTfIdfPreTokenized(
                expandedTokens,
                indexed.map((d) => ({
                    tokens: d.tokens,
                    titleTokens: d.titleTokens,
                    lastModified: d.mtimeMs,
                    conflictCount: d.conflictCount,
                }))
            );

            // Hybrid blend: when the caller provided a query embedding and an
            // embedding model, fetch the cached file vectors and add a cosine
            // similarity term to each score. We normalise TF-IDF scores by the
            // top observed value so the two terms live on the same scale before
            // blending. Files without a cached embedding keep their pure TF-IDF
            // score so adding/missing embeddings doesn't hurt retrieval.
            if (queryEmbedding && embeddingModel && (embeddingBlendAlpha ?? 0) > 0) {
                const alpha = Math.max(0, Math.min(1, embeddingBlendAlpha!));
                const filePaths = indexed.map((d) => d.filePath);
                const embeddings = getBrainEmbeddings(brain.localBrainPath, filePaths, embeddingModel);
                if (embeddings.size > 0) {
                    const maxTfidf = scored.reduce((m, s) => s.score > m ? s.score : m, 0) || 1;
                    let hits = 0;
                    for (const s of scored) {
                        const fp = indexed[s.index].filePath;
                        const vec = embeddings.get(fp);
                        if (!vec) continue;
                        const cos = cosineSimilarity(queryEmbedding, vec); // [-1, 1] in theory; positive for typical embedding spaces
                        const tfidfNorm = s.score / maxTfidf;
                        s.score = (1 - alpha) * tfidfNorm + alpha * Math.max(0, cos);
                        hits++;
                    }
                    if (hits > 0) {
                        // Re-sort downstream is handled by the .filter().sort() that follows.
                    }
                }
            }

            // Always consider lesson cards for the top slots even if they didn't crack the raw-score top-`limit`:
            // they're short, high-signal, and we want them surfaced when relevant. We keep the regular top-`limit`
            // and additively pull in up to a few lesson cards (deduped by index).
            const ranked = scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score);
            const pickedIdx = new Set<number>();
            for (const s of ranked.slice(0, limit)) pickedIdx.add(s.index);
            const LESSON_EXTRA = 3;
            let lessonExtra = 0;
            for (const s of ranked) {
                if (lessonExtra >= LESSON_EXTRA) break;
                if (pickedIdx.has(s.index)) continue;
                if ((indexed[s.index].kind || '') === '') continue;
                pickedIdx.add(s.index);
                lessonExtra++;
            }
            // Preserve rank order for the chosen set.
            const chosen = ranked.filter((s) => pickedIdx.has(s.index));

            const topResults: RetrievalChunk[] = [];
            for (const s of chosen) {
                const doc = indexed[s.index];
                const isLesson = (doc.kind || '') !== '';
                // Only the chosen files are actually read off disk (for excerpt extraction).
                let content = '';
                try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — skip */ continue; }
                // Lesson cards: extract just the high-signal sections (Mistake / Root Cause / Fix /
                // Prevention Checklist) instead of dumping the whole 2500-char card. Old lessons
                // without those headings fall back to a query-targeted excerpt. Cuts retrieval tokens
                // by ~70% per lesson without losing the guardrail content.
                //
                // Regular notes: pick the best heading-bounded section for the query (markdown
                // section retrieval) so that long notes don't dump their intro/setup blocks just
                // because they happen to be in the top 400 chars. Falls back to keyword-window
                // extraction inside the section, or whole-doc extraction when there are no
                // headings at all.
                const excerpt = isLesson
                    ? extractLessonEssence(content, 1200) || extractBestExcerpt(content, expandedTokens, 1200)
                    : extractBestSection(content, expandedTokens, 600);
                const cap = isLesson ? 1200 : 600;
                topResults.push({
                    id: `brain-${s.index}`,
                    source: 'brain-memory' as const,
                    title: doc.relativePath,
                    content: summarizeText(excerpt, cap),
                    score: s.score,
                    tokenEstimate: estimateTokens(excerpt),
                    metadata: {
                        filePath: doc.filePath,
                        category: this.inferCategory(doc.relativePath),
                        isProjectEvidence: this.isProjectEvidence(doc.relativePath, content),
                        lastUpdated: doc.mtimeMs,
                        // Phase 5: Scoring Intelligence Integration
                        conflictDetected: s.conflictDetected,
                        conflictSeverity: s.conflictSeverity,
                        queryCoverage: s.queryCoverage,
                        ...(isLesson ? { isLesson: true, lessonKind: doc.kind } : {}),
                    },
                });
            }
            return topResults;
        } catch {
            return [];
        }
    }

    // ─── Brain Chunk Search (Phase 1-가) ───

    /**
     * 섹션 청크 단위 검색. 파일 단위 `searchBrainFiles` 와 동일한 TF-IDF scoring 을
     * *청크* 에 적용하고, 매치된 섹션 본문을 그대로 발췌(파일 모드의 read-time
     * extractBestSection 불필요). dense blend 는 v1 에서 파일 단위 임베딩을 그 파일의
     * 모든 청크에 공유 적용한다(청크별 임베딩은 후속 단계). 한 파일이 결과를 독식하지
     * 않도록 파일당 청크 수를 제한한다.
     */
    private searchBrainChunks(
        expandedTokens: string[],
        brain: BrainProfile,
        allFiles: string[],
        limit: number,
        chunkTargetChars: number,
        queryEmbedding?: number[],
        embeddingModel?: string,
        embeddingBlendAlpha?: number,
    ): RetrievalChunk[] {
        const chunks = getBrainChunkIndex(brain.localBrainPath, allFiles, chunkTargetChars);
        if (chunks.length === 0) return [];

        const scored = scoreTfIdfPreTokenized(
            expandedTokens,
            chunks.map((c) => ({
                tokens: c.tokens,
                titleTokens: c.headingTokens,
                lastModified: c.mtimeMs,
                conflictCount: 0,
            })),
        );

        // Hybrid: 청크 단위 임베딩(`${filePath}#${chunkIndex}`)으로 dense blend. 청크 벡터가
        // 아직 없는 항목은 파일 단위 임베딩으로 fallback → 둘 다 없으면 순수 TF-IDF 유지.
        //
        // 스케일 주의 (측정으로 잡은 버그 2건):
        //  1. *모든* 후보를 maxTfidf 로 정규화해야 한다 — 벡터 있는 것만 0..1 로 줄이면
        //     벡터 없는 후보의 raw 점수(≫1)가 상위를 독식해 blend 가 무효가 된다.
        //  2. cosine 은 후보군 내 min-max 정규화 — 임베딩 모델은 무관 문서끼리도
        //     cos 0.5~0.7 이 나와, 절대값 가산은 균일 노이즈로 sparse 정밀도를 흐린다.
        if (queryEmbedding && embeddingModel && (embeddingBlendAlpha ?? 0) > 0) {
            const alpha = Math.max(0, Math.min(1, embeddingBlendAlpha!));
            const chunkEmb = getBrainChunkEmbeddings(brain.localBrainPath, embeddingModel);
            const filePaths = Array.from(new Set(chunks.map((c) => c.filePath)));
            const fileEmb = getBrainEmbeddings(brain.localBrainPath, filePaths, embeddingModel);
            if (chunkEmb.size > 0 || fileEmb.size > 0) {
                const maxTfidf = scored.reduce((m, s) => (s.score > m ? s.score : m), 0) || 1;
                const cosines = new Array<number | null>(scored.length).fill(null);
                let minCos = Infinity, maxCos = -Infinity;
                for (let i = 0; i < scored.length; i++) {
                    const c = chunks[scored[i].index];
                    const vec = chunkEmb.get(`${c.filePath}#${c.chunkIndex}`) || fileEmb.get(c.filePath);
                    if (!vec) continue;
                    const cos = cosineSimilarity(queryEmbedding, vec);
                    cosines[i] = cos;
                    if (cos < minCos) minCos = cos;
                    if (cos > maxCos) maxCos = cos;
                }
                const span = maxCos > minCos ? maxCos - minCos : 1;
                for (let i = 0; i < scored.length; i++) {
                    const s = scored[i];
                    const sparse = s.score / maxTfidf;
                    const cos = cosines[i];
                    // 벡터 없는 후보는 sparse 점수 유지 (임베딩 미색인이 검색을 해치지 않게).
                    s.score = cos === null ? sparse : (1 - alpha) * sparse + alpha * ((cos - minCos) / span);
                }
            }
        }

        const ranked = scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score);

        // 파일당 청크 상한 — 한 문서가 top 슬롯을 독식하지 않게.
        const PER_FILE_CAP = 3;
        const perFile = new Map<string, number>();
        const chosen: typeof ranked = [];
        for (const s of ranked) {
            const fp = chunks[s.index].filePath;
            const n = perFile.get(fp) || 0;
            if (n >= PER_FILE_CAP) continue;
            perFile.set(fp, n + 1);
            chosen.push(s);
            if (chosen.length >= limit) break;
        }

        const fileContentCache = new Map<string, string>();
        const readFile = (fp: string): string => {
            let c = fileContentCache.get(fp);
            if (c === undefined) {
                try { c = fs.readFileSync(fp, 'utf8'); } catch { c = ''; }
                fileContentCache.set(fp, c);
            }
            return c;
        };

        const topResults: RetrievalChunk[] = [];
        for (const s of chosen) {
            const c = chunks[s.index];
            const content = readFile(c.filePath);
            if (!content) continue;
            const isLesson = (c.kind || '') !== '';
            // 일반 노트: 매치된 섹션 본문 그대로. lesson 카드: 통째 청크라 essence 추출 유지.
            let body = isLesson
                ? (extractLessonEssence(content, 1200) || content.slice(c.charStart, c.charEnd))
                : content.slice(c.charStart, c.charEnd);
            const cap = isLesson ? 1200 : 700;
            // 섹션 breadcrumb 을 본문 맨 앞에 — 모델이 어느 맥락의 섹션인지 알도록.
            const crumb = !isLesson && c.headingPath.length ? `〔${c.headingPath.join(' › ')}〕\n` : '';
            body = crumb + body.trim();
            topResults.push({
                id: `brain-chunk-${s.index}`,
                source: 'brain-memory' as const,
                title: c.relativePath,
                content: summarizeText(body, cap + crumb.length),
                score: s.score,
                tokenEstimate: estimateTokens(body),
                metadata: {
                    filePath: c.filePath,
                    category: this.inferCategory(c.relativePath),
                    isProjectEvidence: this.isProjectEvidence(c.relativePath, content),
                    lastUpdated: c.mtimeMs,
                    conflictDetected: s.conflictDetected,
                    conflictSeverity: s.conflictSeverity,
                    queryCoverage: s.queryCoverage,
                    ...(isLesson ? { isLesson: true, lessonKind: c.kind } : {}),
                },
            });
        }
        return topResults;
    }

    // ─── Memory Layer Search ───

    private searchMemoryLayers(
        query: string,
        memoryManager: MemoryManager,
        chatHistory: Array<{ role: string; content: string }>,
        workspacePath?: string
    ): RetrievalChunk[] {
        const chunks: RetrievalChunk[] = [];

        // Long-Term Memory
        const ltm = memoryManager.getLongTermMemory();
        const ltmContext = ltm.buildContext(query);
        if (ltmContext) {
            chunks.push({
                id: 'ltm-context',
                source: 'long-term-memory',
                title: ltmContext.label,
                content: ltmContext.content,
                score: ltmContext.relevance,
                tokenEstimate: estimateTokens(ltmContext.content),
                metadata: { category: 'long-term' }
            });
        }

        // Project Memory
        if (workspacePath) {
            const pm = memoryManager.getProjectMemory(workspacePath);
            const pmContext = pm.buildContext(query);
            if (pmContext) {
                chunks.push({
                    id: 'pm-context',
                    source: 'project-memory',
                    title: pmContext.label,
                    content: pmContext.content,
                    score: pmContext.relevance,
                    tokenEstimate: estimateTokens(pmContext.content),
                    metadata: { category: 'project', isProjectEvidence: true }
                });
            }
        }

        // Procedural Memory
        const proc = memoryManager.getProceduralMemory();
        const procContext = proc.buildContext(query);
        if (procContext) {
            chunks.push({
                id: 'proc-context',
                source: 'procedural-memory',
                title: procContext.label,
                content: procContext.content,
                score: procContext.relevance,
                tokenEstimate: estimateTokens(procContext.content),
                metadata: { category: 'procedural' }
            });
        }

        // Episodic Memory
        const ep = memoryManager.getEpisodicMemory();
        const epContext = ep.buildContext(query);
        if (epContext) {
            chunks.push({
                id: 'ep-context',
                source: 'episodic-memory',
                title: epContext.label,
                content: epContext.content,
                score: epContext.relevance,
                tokenEstimate: estimateTokens(epContext.content),
                metadata: { category: 'episodic' }
            });
        }

        return chunks;
    }

    // ─── Medium-Term: Recent Sessions ───

    /**
     * Score the user-provided session summaries against the current query
     * (lightweight token overlap — sessions are small so we skip the TF-IDF
     * machinery) and return up to `limit` as chunks. Each chunk packs the
     * title + first user message + last assistant excerpt — enough for the
     * model to recall the thread without re-injecting the whole transcript.
     *
     * Why include recent sessions at all: short-term covers "this conversation",
     * long-term covers "stable brain notes", but there's a gap for "what we
     * worked on yesterday/last week" that the user expects me to remember.
     */
    private scoreRecentSessions(
        expandedTokens: string[],
        sessions: RecentSessionSummary[],
        limit: number,
    ): RetrievalChunk[] {
        if (!sessions || sessions.length === 0 || limit <= 0) return [];
        const qSet = new Set(expandedTokens.filter((t) => t.length >= 2));
        const scored = sessions.map((s) => {
            // Prefer the LLM-compressed summary when present — it's a real
            // 2-3 sentence recap of the session, so query matches against it
            // are far more meaningful than against an arbitrary head/tail.
            const text = s.summary
                ? `${s.title}\n${s.summary}`
                : `${s.title}\n${s.firstUserMsg}\n${s.lastAssistantExcerpt}`;
            const docTokens = tokenize(text);
            let overlap = 0;
            for (const t of docTokens) if (qSet.has(t)) overlap++;
            // Tiny recency boost so equal-overlap sessions prefer the more
            // recent one (most users mean "what we just discussed"). +0.1 max
            // for sessions <7 days old, decays to 0 beyond that.
            const ageDays = s.timestamp ? Math.max(0, (Date.now() - s.timestamp) / 86400000) : 999;
            const recency = ageDays < 7 ? (7 - ageDays) / 70 : 0;
            return { s, score: overlap + recency };
        }).filter((x) => x.score > 0);
        scored.sort((a, b) => b.score - a.score);
        const picked = scored.slice(0, limit);
        if (picked.length === 0) return [];
        return picked.map(({ s, score }, idx) => {
            const dateStr = s.timestamp ? new Date(s.timestamp).toISOString().slice(0, 10) : '';
            // Prefer the LLM-compressed summary; fall back to the raw fragments
            // when the session ended before the summarizer could run (or was
            // too short to summarize, < 3 visible messages).
            const body = s.summary
                ? [`**${s.title}**${dateStr ? ` (${dateStr})` : ''}`, s.summary].join('\n')
                : [
                    `**${s.title}**${dateStr ? ` (${dateStr})` : ''}`,
                    s.firstUserMsg ? `사용자 요청: ${s.firstUserMsg}` : '',
                    s.lastAssistantExcerpt ? `이전 답변 마지막 부분: …${s.lastAssistantExcerpt}` : '',
                ].filter(Boolean).join('\n');
            return {
                id: `mtm-${idx}-${s.id}`,
                source: 'medium-term-memory',
                title: s.title || '(untitled session)',
                content: body,
                score,
                tokenEstimate: estimateTokens(body),
                metadata: { category: 'medium-term', lastUpdated: s.timestamp },
            };
        });
    }

    // ─── Score Normalization ───

    /**
     * 서로 다른 스코어 스케일을 가진 소스들의 점수를 0~1로 정규화합니다.
     */
    private normalizeScores(chunks: RetrievalChunk[]): void {
        // Group by source
        const groups = new Map<string, RetrievalChunk[]>();
        for (const chunk of chunks) {
            if (!groups.has(chunk.source)) groups.set(chunk.source, []);
            groups.get(chunk.source)!.push(chunk);
        }

        // Normalize each group independently
        for (const [, group] of groups) {
            const maxScore = Math.max(...group.map((c) => c.score), 0.001);
            for (const chunk of group) {
                chunk.score = chunk.score / maxScore;
            }
        }

        // Source priority boost (some sources are inherently more valuable for RAG)
        const sourceBoost: Record<string, number> = {
            'brain-trace': 1.0,
            'brain-memory': 0.9,
            'project-memory': 0.85,
            'long-term-memory': 0.8,
            'procedural-memory': 0.95,  // Procedural is highly specific
            'medium-term-memory': 0.78, // recent sessions: useful when the user references "last time / yesterday"
            'episodic-memory': 0.7,
            'project-scan': 0.6,
            'recent-knowledge': 0.75
        };

        for (const chunk of chunks) {
            const boost = sourceBoost[chunk.source] || 0.5;
            chunk.score *= boost;
            // Lesson cards are short, high-signal guardrails — nudge relevant ones above ordinary brain notes
            // so they survive the budget. Modest (1.4×) so they don't crowd everything out when many match.
            if (chunk.metadata.isLesson) chunk.score *= 1.4;
        }
    }

    // ─── Helpers ───

    private isRawConversation(relativePath: string): boolean {
        return /(^|[\\/])(00_Raw|raw-data|conversations?|transcripts?)([\\/]|$)/i.test(relativePath);
    }

    /**
     * 운영(operational) 로그 — 지식이 아니라 세션/메모리/프로젝트 로그. 사용자 wiki taxonomy
     * 에 정의된 폴더 fragment 들. 지식 검색에서 제외한다 (= raw 대화와 동일 취급). recall 지표를
     * 올리진 않지만, 로그를 "지식"으로 끌어오는 의미적 오류와 인덱스/토큰 낭비를 막는다.
     */
    private isOperationalPath(relativePath: string): boolean {
        return /(^|[\\/])(sessions|_agents|_company|memory|Project_Logs|_Archive_Orphans|Post_Drafts|UX_Scenarios)([\\/])/i.test(relativePath)
            || /docs[\\/]records([\\/]|$)/i.test(relativePath)
            || /Harness_Research_/i.test(relativePath);
    }

    private inferCategory(relativePath: string): string {
        const normalized = relativePath.toLowerCase();
        if (/(decisions?|adr|planning)/i.test(normalized)) return 'decision';
        if (/(records|development|bugs)/i.test(normalized)) return 'project-record';
        if (/(architecture|design|pattern)/i.test(normalized)) return 'architecture';
        if (/(knowledge|wiki|topics)/i.test(normalized)) return 'knowledge';
        return 'general';
    }

    private isProjectEvidence(relativePath: string, content: string): boolean {
        const normalized = relativePath.toLowerCase();
        if (/(records|planning|development|bugs|retrospectives|projectchronicle)/i.test(normalized)) return true;
        if (/adr-\d+|(^|[\\/])decisions?([\\/]|$)/i.test(normalized)) return true;
        return false;
    }
}