Files
connectai/src/retrieval/index.ts
T
koriweb 67927b1d4e feat(retrieval): 임베딩 하이브리드 검색 활성화 — 자동 감지 + 측정 기반 수정 (v2.2.222)
골든셋(24질의) 측정으로 기존 하이브리드 구현의 결함 3건을 잡고 기본 활성화.
측정 결과: recall@3 83.3%→87.5%, MRR 0.802→0.806, recall@1 회귀 없음 (α=0.5).

수정 (측정으로 검증):
- 임베딩 입력을 토큰 재조합(tokens.join)→원문 슬라이스로 교체 + nomic/e5
  task prefix (search_query:/search_document:). 토큰 죽 입력은 하이브리드를
  전 지표 하락시켰음 (recall@1 75%→54%). @r2 리비전 키로 구벡터 자동 무효화.
- 블렌드 스케일 버그: 벡터 있는 후보만 정규화돼 벡터 없는 후보의 raw 점수가
  상위 독식 → 전 후보 정규화 + cosine 후보군 내 min-max 정규화.
- 헤딩-only 청크도 헤딩 텍스트로 임베딩 (벡터 공백 제거).

추가:
- embeddingBootstrap: 활성화 시 엔진 모델 목록에서 임베딩 모델 자동 감지 →
  embeddingModel 자동 설정 + "전체 색인" 버튼 알림. 다국어 모델(e5/bge-m3) 우선.
  사용자가 의도적으로 비우면 재설정 안 함 (globalState 가드).
- 벡터 저장 시 소수 4자리 양자화 — 캐시 360MB→~150MB (코사인 순위 영향 없음).
- tests/retrievalEvalEmbedding.test.ts: env-gated 하이브리드 측정 하니스 (alpha sweep).
- scripts/compact_brain_index.mjs: 기존 full-precision 캐시 1회 압축 도구.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-11 19:02:56 +09:00

766 lines
36 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* ============================================================
* RetrievalOrchestrator — Unified RAG Pipeline
*
* Astra의 모든 검색 소스를 통합 관리하는 오케스트레이터입니다.
*
* 검색 흐름:
* ① Query Planning — 의도 분류 + 검색 전략 결정
* ② Parallel Search — Brain + Memory + Project + Episode 동시 검색
* ③ Result Fusion — 통합 스코어링 + 중복 제거
* ④ Context Budget — 토큰 예산 내에서 최종 선택
* ============================================================
*/
import * as fs from 'fs';
import * as path from 'path';
import { BrainProfile } from '../config';
import { findBrainFiles, summarizeText } from '../utils';
import { isInside } from '../lib/paths';
import { MemoryManager } from '../memory';
import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types';
import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt, extractBestSection } from './scoring';
import { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
import { getBrainTokenIndex, getBrainEmbeddings, getBrainChunkIndex, getBrainChunkEmbeddings } from './brainIndex';
import { extractLessonEssence } from './lessonHelpers';
import { cosineSimilarity } from './embeddings';
import { applyActionabilityBoost, WorkStateSignals, ActionabilityWeights } from './actionabilityScoring';
import { applyHierarchicalReweight, classifyQueryLevel, AbstractionLevel, HierarchicalWeights } from './hierarchicalLevel';
export { tokenize, expandQuery, scoreTfIdf, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
export { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
export { getBrainTokenIndex, clearBrainTokenIndex } from './brainIndex';
export * from './types';
/** Compact summary of a past chat session for medium-term memory retrieval. */
export interface RecentSessionSummary {
id: string;
title: string;
firstUserMsg: string;
lastAssistantExcerpt: string;
/**
* Optional LLM-compressed recap stored at session end (~200 chars).
* When present, retrieval uses this instead of the firstUserMsg+tail
* fragment because it actually captures the decision/outcome.
*/
summary?: string;
timestamp: number;
}
interface RetrievalOptions {
brain: BrainProfile;
memoryManager: MemoryManager;
workspacePath?: string;
chatHistory?: Array<{ role: string; content: string }>;
contextBudget?: Partial<ContextBudgetConfig>;
brainFileLimit?: number;
includeRawConversations?: boolean;
/**
* Optional absolute folder paths constraining brain-file search to those
* subtrees. When provided and non-empty, only brain files inside one of
* the folders are considered. Empty / undefined preserves whole-brain
* search (legacy behavior). Folders that escape the brain root are
* silently dropped by the caller (see `agentKnowledgeMap.resolveScopeForAgent`).
*/
scopeFolders?: string[];
/**
* Compact summaries of recently-touched chat sessions (excluding the
* active one). Scored against the query and the top `mediumTermLimit`
* are injected as medium-term memory chunks. Caller pre-computes these
* to avoid threading vscode/ExtensionContext through this module.
*/
recentSessions?: RecentSessionSummary[];
/** Max number of medium-term session chunks to include after scoring. */
mediumTermLimit?: number;
/**
* Optional query embedding for hybrid (sparse+dense) brain search. When
* provided, each candidate file's cached embedding is cosine-matched and
* blended with the TF-IDF score by `embeddingBlendAlpha`. Caller computes
* this once per turn so we don't pay the embedding RTT inside scoring.
*/
queryEmbedding?: number[];
/** Embedding model name (used as a cache key on the brain index side). */
embeddingModel?: string;
/** Blend weight: 0 = TF-IDF only, 1 = cosine only. Default 0.5. */
embeddingBlendAlpha?: number;
/**
* Actionability — "현재 작업 상태" 신호(최근 슬래시 명령 + 열린 파일) 로 검색 결과 재가중.
* undefined 면 actionability re-rank 안 함 (legacy 동작).
*/
workStateSignals?: WorkStateSignals;
/** Actionability 결합 가중치. undefined 면 default. */
actionabilityWeights?: ActionabilityWeights;
/**
* Hierarchical Context Window — 질의·문서 추상도 매칭 재가중.
* true 면 query 추상도 분류 후 chunks 재가중. false / undefined 면 skip.
*/
hierarchicalReweightEnabled?: boolean;
/** Hierarchical 가중치 override. undefined 면 default. */
hierarchicalWeights?: HierarchicalWeights;
/**
* Section-level chunking (Phase 1-가). true 면 brain 검색이 파일이 아니라 섹션 청크
* 단위로 색인·스코어링하고, 매치된 *섹션* 을 그대로 주입한다. false/undefined 면 기존
* 파일 단위 동작.
*/
chunkLevelRetrieval?: boolean;
/** 섹션 청크 목표 길이(문자). 기본 1200. chunkLevelRetrieval 일 때만 사용. */
chunkTargetChars?: number;
}
export class RetrievalOrchestrator {
/**
* 통합 검색을 수행합니다.
* 모든 소스에서 검색 → TF-IDF 스코어링 → 중복 제거 → 예산 내 선택
*/
public retrieve(query: string, options: RetrievalOptions): RetrievalResult {
const fusionLog: string[] = [];
const allChunks: RetrievalChunk[] = [];
const queryTokens = tokenize(query);
const expandedTokens = expandQuery(queryTokens);
fusionLog.push(`Query tokens: [${queryTokens.slice(0, 10).join(', ')}]`);
fusionLog.push(`Expanded tokens: [${expandedTokens.slice(0, 15).join(', ')}]`);
// ── ① Brain File Search (TF-IDF enhanced, optionally hybrid with embeddings) ──
// `brainFileLimit === 0` is meaningful (Knowledge Mix "model knowledge only"
// mode), so use `??` rather than `||`. When the caller explicitly passes 0,
// we skip retrieval entirely instead of falling back to the default of 8.
const scopeFolders = options.scopeFolders ?? [];
const brainFileLimit = options.brainFileLimit ?? 8;
const brainChunks = brainFileLimit > 0
? this.searchBrainFiles(
query,
expandedTokens,
options.brain,
brainFileLimit,
options.includeRawConversations || false,
scopeFolders,
options.queryEmbedding,
options.embeddingModel,
options.embeddingBlendAlpha,
options.chunkLevelRetrieval || false,
options.chunkTargetChars ?? 1200,
)
: [];
allChunks.push(...brainChunks);
fusionLog.push(
brainFileLimit === 0
? 'Brain search: skipped (Knowledge Mix weight = 0)'
: scopeFolders.length > 0
? `Brain search (scoped to ${scopeFolders.length} folder(s)): ${brainChunks.length} chunks`
: `Brain search: ${brainChunks.length} chunks found`
);
// ── ② Memory Layers ──
const memoryChunks = this.searchMemoryLayers(
query,
options.memoryManager,
options.chatHistory || [],
options.workspacePath
);
allChunks.push(...memoryChunks);
fusionLog.push(`Memory search: ${memoryChunks.length} chunks found`);
// ── ②-b Medium-Term Memory (recent sessions) ──
const mediumChunks = this.scoreRecentSessions(
expandedTokens,
options.recentSessions || [],
options.mediumTermLimit ?? 0
);
allChunks.push(...mediumChunks);
fusionLog.push(`Medium-term sessions: ${mediumChunks.length} chunks selected`);
// ── ③ Result Fusion — normalize scores across sources ──
this.normalizeScores(allChunks);
fusionLog.push(`Total chunks before budget: ${allChunks.length}`);
// ── ③-b Actionability Re-rank — work-state 신호로 점수 boost ──
// normalize 직후, budget 전 — actionability 가 어떤 chunk 가 살아남는지에 영향.
if (options.workStateSignals) {
applyActionabilityBoost(allChunks, options.workStateSignals, options.actionabilityWeights);
const boosted = allChunks.filter((c) => (c.metadata as any).actionabilityScore > 0).length;
const cmds = options.workStateSignals.recentSlashCommands.slice(0, 3).join(',');
const openFile = options.workStateSignals.openFilePath ? path.basename(options.workStateSignals.openFilePath) : '-';
fusionLog.push(`Actionability re-rank: ${boosted} chunks boosted (cmds=[${cmds}], openFile=${openFile})`);
}
// ── ③-c Hierarchical Context Window — 추상도 레벨 매칭 ──
// 질의·문서 추상도 매칭 점수 조정. 같은 레벨 bonus, 양 끝 mismatch penalty.
// Actionability 직후 — 두 재가중을 합쳐 한 번의 budget selection.
if (options.hierarchicalReweightEnabled) {
const queryLevel = classifyQueryLevel(query);
const { sameLevel, farMismatch } = applyHierarchicalReweight(allChunks, queryLevel, options.hierarchicalWeights);
fusionLog.push(`Hierarchical re-rank (query=${queryLevel}): ${sameLevel} same-level (+), ${farMismatch} far-mismatch (-)`);
}
// ── ④ Context Budget Selection ──
const { selected, dropped, tokensUsed } = selectWithinBudget(
allChunks,
options.contextBudget
);
// Pull lesson/playbook/qa-finding chunks out so callers can inject them as a prominent
// "verify before finalizing" block rather than burying them in the brain-knowledge section.
const lessonChunks = selected.filter((c) => c.metadata.isLesson);
const selectedChunks = selected.filter((c) => !c.metadata.isLesson);
fusionLog.push(`Selected: ${selectedChunks.length} (+${lessonChunks.length} lesson), Dropped: ${dropped.length}, Tokens: ${tokensUsed}`);
return {
query,
totalChunks: allChunks.length,
selectedChunks,
droppedChunks: dropped,
lessonChunks,
totalTokensUsed: tokensUsed,
contextBudget: options.contextBudget?.totalBudget || 8000,
fusionLog
};
}
/**
* 검색 결과를 최종 컨텍스트 문자열로 변환합니다 (레슨 청크는 제외 — 별도 블록으로 주입).
*/
public buildContextString(result: RetrievalResult): string {
return assembleContext(result.selectedChunks);
}
/**
* 평가 전용 — 한 질의에 대한 brain 파일 랭킹(점수 내림차순)을 *context budget 적용 전*
* 으로 반환한다. recall@k / MRR 계산용. 프로덕션 `retrieve()` 와 동일한 scoring 경로
* (`searchBrainFiles`) 를 그대로 재사용하므로, 측정값이 실제 검색 동작을 반영한다 (무결성).
*/
public rankBrainForEval(
query: string,
brain: BrainProfile,
opts: {
limit?: number;
scopeFolders?: string[];
includeRawConversations?: boolean;
queryEmbedding?: number[];
embeddingModel?: string;
embeddingBlendAlpha?: number;
chunkLevelRetrieval?: boolean;
chunkTargetChars?: number;
} = {},
): Array<{ relativePath: string; filePath: string; score: number }> {
const limit = opts.limit ?? 20;
const expandedTokens = expandQuery(tokenize(query));
// chunk 모드는 파일당 여러 청크를 반환하므로, recall 을 *파일 단위* 로 측정하려면
// 넉넉히 받아 dedup 한다 (limit 개의 고유 파일 확보).
const internalLimit = opts.chunkLevelRetrieval ? limit * 3 : limit;
const chunks = this.searchBrainFiles(
query,
expandedTokens,
brain,
internalLimit,
opts.includeRawConversations ?? false,
opts.scopeFolders ?? [],
opts.queryEmbedding,
opts.embeddingModel,
opts.embeddingBlendAlpha,
opts.chunkLevelRetrieval || false,
opts.chunkTargetChars ?? 1200,
);
// dedup by file, 점수 내림차순 순서 유지 → 파일 단위 랭킹.
const out: Array<{ relativePath: string; filePath: string; score: number }> = [];
const seen = new Set<string>();
const brainRoot = brain.localBrainPath;
for (const c of chunks) {
const filePath = (c.metadata.filePath as string) || '';
if (!filePath || seen.has(filePath)) continue;
seen.add(filePath);
const relativePath = filePath ? (path.relative(brainRoot, filePath) || c.title) : c.title;
out.push({ relativePath, filePath, score: c.score });
if (out.length >= limit) break;
}
return out;
}
// ─── Brain File Search ───
private searchBrainFiles(
query: string,
expandedTokens: string[],
brain: BrainProfile,
limit: number,
includeRaw: boolean,
scopeFolders: string[] = [],
queryEmbedding?: number[],
embeddingModel?: string,
embeddingBlendAlpha?: number,
chunkLevel: boolean = false,
chunkTargetChars: number = 1200,
): RetrievalChunk[] {
try {
const scoped = (file: string) => scopeFolders.length === 0
|| scopeFolders.some((folder) => isInside(folder, file));
const allFiles = findBrainFiles(brain.localBrainPath)
.filter(scoped)
.filter((file) => {
const rel = path.relative(brain.localBrainPath, file);
return (includeRaw || !this.isRawConversation(rel)) && !this.isOperationalPath(rel);
});
if (allFiles.length === 0) return [];
// Phase 1-가: 섹션 청크 단위 검색 경로. 파일 단위와 분리해 회귀 위험 격리.
if (chunkLevel) {
return this.searchBrainChunks(
expandedTokens, brain, allFiles, limit, chunkTargetChars,
queryEmbedding, embeddingModel, embeddingBlendAlpha,
);
}
// Tokenized docs from the persistent mtime-keyed index — unchanged files are not re-read
// or re-tokenized, so per-query work over a large brain drops from O(total content) to O(files) stats.
const indexed = getBrainTokenIndex(brain.localBrainPath, allFiles);
if (indexed.length === 0) return [];
const scored = scoreTfIdfPreTokenized(
expandedTokens,
indexed.map((d) => ({
tokens: d.tokens,
titleTokens: d.titleTokens,
lastModified: d.mtimeMs,
conflictCount: d.conflictCount,
}))
);
// Hybrid blend: when the caller provided a query embedding and an
// embedding model, fetch the cached file vectors and add a cosine
// similarity term to each score. We normalise TF-IDF scores by the
// top observed value so the two terms live on the same scale before
// blending. Files without a cached embedding keep their pure TF-IDF
// score so adding/missing embeddings doesn't hurt retrieval.
if (queryEmbedding && embeddingModel && (embeddingBlendAlpha ?? 0) > 0) {
const alpha = Math.max(0, Math.min(1, embeddingBlendAlpha!));
const filePaths = indexed.map((d) => d.filePath);
const embeddings = getBrainEmbeddings(brain.localBrainPath, filePaths, embeddingModel);
if (embeddings.size > 0) {
const maxTfidf = scored.reduce((m, s) => s.score > m ? s.score : m, 0) || 1;
let hits = 0;
for (const s of scored) {
const fp = indexed[s.index].filePath;
const vec = embeddings.get(fp);
if (!vec) continue;
const cos = cosineSimilarity(queryEmbedding, vec); // [-1, 1] in theory; positive for typical embedding spaces
const tfidfNorm = s.score / maxTfidf;
s.score = (1 - alpha) * tfidfNorm + alpha * Math.max(0, cos);
hits++;
}
if (hits > 0) {
// Re-sort downstream is handled by the .filter().sort() that follows.
}
}
}
// Always consider lesson cards for the top slots even if they didn't crack the raw-score top-`limit`:
// they're short, high-signal, and we want them surfaced when relevant. We keep the regular top-`limit`
// and additively pull in up to a few lesson cards (deduped by index).
const ranked = scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score);
const pickedIdx = new Set<number>();
for (const s of ranked.slice(0, limit)) pickedIdx.add(s.index);
const LESSON_EXTRA = 3;
let lessonExtra = 0;
for (const s of ranked) {
if (lessonExtra >= LESSON_EXTRA) break;
if (pickedIdx.has(s.index)) continue;
if ((indexed[s.index].kind || '') === '') continue;
pickedIdx.add(s.index);
lessonExtra++;
}
// Preserve rank order for the chosen set.
const chosen = ranked.filter((s) => pickedIdx.has(s.index));
const topResults: RetrievalChunk[] = [];
for (const s of chosen) {
const doc = indexed[s.index];
const isLesson = (doc.kind || '') !== '';
// Only the chosen files are actually read off disk (for excerpt extraction).
let content = '';
try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — skip */ continue; }
// Lesson cards: extract just the high-signal sections (Mistake / Root Cause / Fix /
// Prevention Checklist) instead of dumping the whole 2500-char card. Old lessons
// without those headings fall back to a query-targeted excerpt. Cuts retrieval tokens
// by ~70% per lesson without losing the guardrail content.
//
// Regular notes: pick the best heading-bounded section for the query (markdown
// section retrieval) so that long notes don't dump their intro/setup blocks just
// because they happen to be in the top 400 chars. Falls back to keyword-window
// extraction inside the section, or whole-doc extraction when there are no
// headings at all.
const excerpt = isLesson
? extractLessonEssence(content, 1200) || extractBestExcerpt(content, expandedTokens, 1200)
: extractBestSection(content, expandedTokens, 600);
const cap = isLesson ? 1200 : 600;
topResults.push({
id: `brain-${s.index}`,
source: 'brain-memory' as const,
title: doc.relativePath,
content: summarizeText(excerpt, cap),
score: s.score,
tokenEstimate: estimateTokens(excerpt),
metadata: {
filePath: doc.filePath,
category: this.inferCategory(doc.relativePath),
isProjectEvidence: this.isProjectEvidence(doc.relativePath, content),
lastUpdated: doc.mtimeMs,
// Phase 5: Scoring Intelligence Integration
conflictDetected: s.conflictDetected,
conflictSeverity: s.conflictSeverity,
queryCoverage: s.queryCoverage,
...(isLesson ? { isLesson: true, lessonKind: doc.kind } : {}),
},
});
}
return topResults;
} catch {
return [];
}
}
// ─── Brain Chunk Search (Phase 1-가) ───
/**
* 섹션 청크 단위 검색. 파일 단위 `searchBrainFiles` 와 동일한 TF-IDF scoring 을
* *청크* 에 적용하고, 매치된 섹션 본문을 그대로 발췌(파일 모드의 read-time
* extractBestSection 불필요). dense blend 는 v1 에서 파일 단위 임베딩을 그 파일의
* 모든 청크에 공유 적용한다(청크별 임베딩은 후속 단계). 한 파일이 결과를 독식하지
* 않도록 파일당 청크 수를 제한한다.
*/
private searchBrainChunks(
expandedTokens: string[],
brain: BrainProfile,
allFiles: string[],
limit: number,
chunkTargetChars: number,
queryEmbedding?: number[],
embeddingModel?: string,
embeddingBlendAlpha?: number,
): RetrievalChunk[] {
const chunks = getBrainChunkIndex(brain.localBrainPath, allFiles, chunkTargetChars);
if (chunks.length === 0) return [];
const scored = scoreTfIdfPreTokenized(
expandedTokens,
chunks.map((c) => ({
tokens: c.tokens,
titleTokens: c.headingTokens,
lastModified: c.mtimeMs,
conflictCount: 0,
})),
);
// Hybrid: 청크 단위 임베딩(`${filePath}#${chunkIndex}`)으로 dense blend. 청크 벡터가
// 아직 없는 항목은 파일 단위 임베딩으로 fallback → 둘 다 없으면 순수 TF-IDF 유지.
//
// 스케일 주의 (측정으로 잡은 버그 2건):
// 1. *모든* 후보를 maxTfidf 로 정규화해야 한다 — 벡터 있는 것만 0..1 로 줄이면
// 벡터 없는 후보의 raw 점수(≫1)가 상위를 독식해 blend 가 무효가 된다.
// 2. cosine 은 후보군 내 min-max 정규화 — 임베딩 모델은 무관 문서끼리도
// cos 0.5~0.7 이 나와, 절대값 가산은 균일 노이즈로 sparse 정밀도를 흐린다.
if (queryEmbedding && embeddingModel && (embeddingBlendAlpha ?? 0) > 0) {
const alpha = Math.max(0, Math.min(1, embeddingBlendAlpha!));
const chunkEmb = getBrainChunkEmbeddings(brain.localBrainPath, embeddingModel);
const filePaths = Array.from(new Set(chunks.map((c) => c.filePath)));
const fileEmb = getBrainEmbeddings(brain.localBrainPath, filePaths, embeddingModel);
if (chunkEmb.size > 0 || fileEmb.size > 0) {
const maxTfidf = scored.reduce((m, s) => (s.score > m ? s.score : m), 0) || 1;
const cosines = new Array<number | null>(scored.length).fill(null);
let minCos = Infinity, maxCos = -Infinity;
for (let i = 0; i < scored.length; i++) {
const c = chunks[scored[i].index];
const vec = chunkEmb.get(`${c.filePath}#${c.chunkIndex}`) || fileEmb.get(c.filePath);
if (!vec) continue;
const cos = cosineSimilarity(queryEmbedding, vec);
cosines[i] = cos;
if (cos < minCos) minCos = cos;
if (cos > maxCos) maxCos = cos;
}
const span = maxCos > minCos ? maxCos - minCos : 1;
for (let i = 0; i < scored.length; i++) {
const s = scored[i];
const sparse = s.score / maxTfidf;
const cos = cosines[i];
// 벡터 없는 후보는 sparse 점수 유지 (임베딩 미색인이 검색을 해치지 않게).
s.score = cos === null ? sparse : (1 - alpha) * sparse + alpha * ((cos - minCos) / span);
}
}
}
const ranked = scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score);
// 파일당 청크 상한 — 한 문서가 top 슬롯을 독식하지 않게.
const PER_FILE_CAP = 3;
const perFile = new Map<string, number>();
const chosen: typeof ranked = [];
for (const s of ranked) {
const fp = chunks[s.index].filePath;
const n = perFile.get(fp) || 0;
if (n >= PER_FILE_CAP) continue;
perFile.set(fp, n + 1);
chosen.push(s);
if (chosen.length >= limit) break;
}
const fileContentCache = new Map<string, string>();
const readFile = (fp: string): string => {
let c = fileContentCache.get(fp);
if (c === undefined) {
try { c = fs.readFileSync(fp, 'utf8'); } catch { c = ''; }
fileContentCache.set(fp, c);
}
return c;
};
const topResults: RetrievalChunk[] = [];
for (const s of chosen) {
const c = chunks[s.index];
const content = readFile(c.filePath);
if (!content) continue;
const isLesson = (c.kind || '') !== '';
// 일반 노트: 매치된 섹션 본문 그대로. lesson 카드: 통째 청크라 essence 추출 유지.
let body = isLesson
? (extractLessonEssence(content, 1200) || content.slice(c.charStart, c.charEnd))
: content.slice(c.charStart, c.charEnd);
const cap = isLesson ? 1200 : 700;
// 섹션 breadcrumb 을 본문 맨 앞에 — 모델이 어느 맥락의 섹션인지 알도록.
const crumb = !isLesson && c.headingPath.length ? `${c.headingPath.join(' ')}\n` : '';
body = crumb + body.trim();
topResults.push({
id: `brain-chunk-${s.index}`,
source: 'brain-memory' as const,
title: c.relativePath,
content: summarizeText(body, cap + crumb.length),
score: s.score,
tokenEstimate: estimateTokens(body),
metadata: {
filePath: c.filePath,
category: this.inferCategory(c.relativePath),
isProjectEvidence: this.isProjectEvidence(c.relativePath, content),
lastUpdated: c.mtimeMs,
conflictDetected: s.conflictDetected,
conflictSeverity: s.conflictSeverity,
queryCoverage: s.queryCoverage,
...(isLesson ? { isLesson: true, lessonKind: c.kind } : {}),
},
});
}
return topResults;
}
// ─── Memory Layer Search ───
private searchMemoryLayers(
query: string,
memoryManager: MemoryManager,
chatHistory: Array<{ role: string; content: string }>,
workspacePath?: string
): RetrievalChunk[] {
const chunks: RetrievalChunk[] = [];
// Long-Term Memory
const ltm = memoryManager.getLongTermMemory();
const ltmContext = ltm.buildContext(query);
if (ltmContext) {
chunks.push({
id: 'ltm-context',
source: 'long-term-memory',
title: ltmContext.label,
content: ltmContext.content,
score: ltmContext.relevance,
tokenEstimate: estimateTokens(ltmContext.content),
metadata: { category: 'long-term' }
});
}
// Project Memory
if (workspacePath) {
const pm = memoryManager.getProjectMemory(workspacePath);
const pmContext = pm.buildContext(query);
if (pmContext) {
chunks.push({
id: 'pm-context',
source: 'project-memory',
title: pmContext.label,
content: pmContext.content,
score: pmContext.relevance,
tokenEstimate: estimateTokens(pmContext.content),
metadata: { category: 'project', isProjectEvidence: true }
});
}
}
// Procedural Memory
const proc = memoryManager.getProceduralMemory();
const procContext = proc.buildContext(query);
if (procContext) {
chunks.push({
id: 'proc-context',
source: 'procedural-memory',
title: procContext.label,
content: procContext.content,
score: procContext.relevance,
tokenEstimate: estimateTokens(procContext.content),
metadata: { category: 'procedural' }
});
}
// Episodic Memory
const ep = memoryManager.getEpisodicMemory();
const epContext = ep.buildContext(query);
if (epContext) {
chunks.push({
id: 'ep-context',
source: 'episodic-memory',
title: epContext.label,
content: epContext.content,
score: epContext.relevance,
tokenEstimate: estimateTokens(epContext.content),
metadata: { category: 'episodic' }
});
}
return chunks;
}
// ─── Medium-Term: Recent Sessions ───
/**
* Score the user-provided session summaries against the current query
* (lightweight token overlap — sessions are small so we skip the TF-IDF
* machinery) and return up to `limit` as chunks. Each chunk packs the
* title + first user message + last assistant excerpt — enough for the
* model to recall the thread without re-injecting the whole transcript.
*
* Why include recent sessions at all: short-term covers "this conversation",
* long-term covers "stable brain notes", but there's a gap for "what we
* worked on yesterday/last week" that the user expects me to remember.
*/
private scoreRecentSessions(
expandedTokens: string[],
sessions: RecentSessionSummary[],
limit: number,
): RetrievalChunk[] {
if (!sessions || sessions.length === 0 || limit <= 0) return [];
const qSet = new Set(expandedTokens.filter((t) => t.length >= 2));
const scored = sessions.map((s) => {
// Prefer the LLM-compressed summary when present — it's a real
// 2-3 sentence recap of the session, so query matches against it
// are far more meaningful than against an arbitrary head/tail.
const text = s.summary
? `${s.title}\n${s.summary}`
: `${s.title}\n${s.firstUserMsg}\n${s.lastAssistantExcerpt}`;
const docTokens = tokenize(text);
let overlap = 0;
for (const t of docTokens) if (qSet.has(t)) overlap++;
// Tiny recency boost so equal-overlap sessions prefer the more
// recent one (most users mean "what we just discussed"). +0.1 max
// for sessions <7 days old, decays to 0 beyond that.
const ageDays = s.timestamp ? Math.max(0, (Date.now() - s.timestamp) / 86400000) : 999;
const recency = ageDays < 7 ? (7 - ageDays) / 70 : 0;
return { s, score: overlap + recency };
}).filter((x) => x.score > 0);
scored.sort((a, b) => b.score - a.score);
const picked = scored.slice(0, limit);
if (picked.length === 0) return [];
return picked.map(({ s, score }, idx) => {
const dateStr = s.timestamp ? new Date(s.timestamp).toISOString().slice(0, 10) : '';
// Prefer the LLM-compressed summary; fall back to the raw fragments
// when the session ended before the summarizer could run (or was
// too short to summarize, < 3 visible messages).
const body = s.summary
? [`**${s.title}**${dateStr ? ` (${dateStr})` : ''}`, s.summary].join('\n')
: [
`**${s.title}**${dateStr ? ` (${dateStr})` : ''}`,
s.firstUserMsg ? `사용자 요청: ${s.firstUserMsg}` : '',
s.lastAssistantExcerpt ? `이전 답변 마지막 부분: …${s.lastAssistantExcerpt}` : '',
].filter(Boolean).join('\n');
return {
id: `mtm-${idx}-${s.id}`,
source: 'medium-term-memory',
title: s.title || '(untitled session)',
content: body,
score,
tokenEstimate: estimateTokens(body),
metadata: { category: 'medium-term', lastUpdated: s.timestamp },
};
});
}
// ─── Score Normalization ───
/**
* 서로 다른 스코어 스케일을 가진 소스들의 점수를 0~1로 정규화합니다.
*/
private normalizeScores(chunks: RetrievalChunk[]): void {
// Group by source
const groups = new Map<string, RetrievalChunk[]>();
for (const chunk of chunks) {
if (!groups.has(chunk.source)) groups.set(chunk.source, []);
groups.get(chunk.source)!.push(chunk);
}
// Normalize each group independently
for (const [, group] of groups) {
const maxScore = Math.max(...group.map((c) => c.score), 0.001);
for (const chunk of group) {
chunk.score = chunk.score / maxScore;
}
}
// Source priority boost (some sources are inherently more valuable for RAG)
const sourceBoost: Record<string, number> = {
'brain-trace': 1.0,
'brain-memory': 0.9,
'project-memory': 0.85,
'long-term-memory': 0.8,
'procedural-memory': 0.95, // Procedural is highly specific
'medium-term-memory': 0.78, // recent sessions: useful when the user references "last time / yesterday"
'episodic-memory': 0.7,
'project-scan': 0.6,
'recent-knowledge': 0.75
};
for (const chunk of chunks) {
const boost = sourceBoost[chunk.source] || 0.5;
chunk.score *= boost;
// Lesson cards are short, high-signal guardrails — nudge relevant ones above ordinary brain notes
// so they survive the budget. Modest (1.4×) so they don't crowd everything out when many match.
if (chunk.metadata.isLesson) chunk.score *= 1.4;
}
}
// ─── Helpers ───
private isRawConversation(relativePath: string): boolean {
return /(^|[\\/])(00_Raw|raw-data|conversations?|transcripts?)([\\/]|$)/i.test(relativePath);
}
/**
* 운영(operational) 로그 — 지식이 아니라 세션/메모리/프로젝트 로그. 사용자 wiki taxonomy
* 에 정의된 폴더 fragment 들. 지식 검색에서 제외한다 (= raw 대화와 동일 취급). recall 지표를
* 올리진 않지만, 로그를 "지식"으로 끌어오는 의미적 오류와 인덱스/토큰 낭비를 막는다.
*/
private isOperationalPath(relativePath: string): boolean {
return /(^|[\\/])(sessions|_agents|_company|memory|Project_Logs|_Archive_Orphans|Post_Drafts|UX_Scenarios)([\\/])/i.test(relativePath)
|| /docs[\\/]records([\\/]|$)/i.test(relativePath)
|| /Harness_Research_/i.test(relativePath);
}
private inferCategory(relativePath: string): string {
const normalized = relativePath.toLowerCase();
if (/(decisions?|adr|planning)/i.test(normalized)) return 'decision';
if (/(records|development|bugs)/i.test(normalized)) return 'project-record';
if (/(architecture|design|pattern)/i.test(normalized)) return 'architecture';
if (/(knowledge|wiki|topics)/i.test(normalized)) return 'knowledge';
return 'general';
}
private isProjectEvidence(relativePath: string, content: string): boolean {
const normalized = relativePath.toLowerCase();
if (/(records|planning|development|bugs|retrospectives|projectchronicle)/i.test(normalized)) return true;
if (/adr-\d+|(^|[\\/])decisions?([\\/]|$)/i.test(normalized)) return true;
return false;
}
}