ded3eea7ce
주요 변경: [chunked writer 아키텍처 (v2.2.74~v2.2.75)] - 5-stage 다중 에이전트(planner/researcher/reflector/writer/synthesizer) 파이프라인 제거 → 단일 ChunkedWriter 의 outline → section[N] → polish 3-step 으로 교체. 본문 분석에서 추상화 손실 / 토큰 폭증 문제 해소 - 답변 길이 자동 분기: 짧은 prompt 는 fast-path direct 1회 호출, 본문 분석은 chunked. outline 빈 배열도 direct 폴백 [코드 리뷰 9개 항목 일괄 패치 (v2.2.76)] - /research polling hang 방어 (heartbeat + status 정규화 + 연속 실패 abort) - 회사 모드 dispatcher abort 신호를 AIService.chat 까지 전달 - bridgeFetch 에 onHeartbeat 콜백 도입 (slow endpoint 사용자 친화적) - dead code 정리: reflectionPersister.ts 제거 + enableReflection 등 좀비 config 키 - parseOutline 의 empty vs fallback reason 명시적 분리 - chatHandlers 의 회사 모드 케이스 ~325줄을 src/sidebar/companyHandlers.ts 로 분리 - Intent Alignment 라운드 한도 도달 시 smart 모드 자동 진행 - LM Studio doSwitch unload 실패 시 currentModel 정리 + load 강행 - retrieval informationDensity → queryCoverage 정합화 [/youtube 채널 지원 (v2.2.77~v2.2.82)] - 채널/플레이리스트 URL 자동 감지 + n:N 으로 영상 개수 지정 (최대 50) - 채널 루트 URL 에 /videos 탭 자동 append (yt-dlp enumeration 정상화) - 영상별 순차 처리 (queue 패턴) + i/N 진행 표시 + 마지막 통계 요약 - mode:info / mode:benchmark / mode:both 분석 모드 분기 - info: 영상 내용을 지식 카드로 추출 (튜토리얼·강의·뉴스용) - benchmark: 4-렌즈 대본 역기획서 (콘텐츠 제작 벤치마크용) - both: 둘 다 (기본) - bare keyword 도 허용: /youtube <url> n:1 info - bridge 에러 메시지 [object Object] 깨짐 수정 (구조화 에러 추출) - "패키지 없음" 등 환경 의존성 에러에 자동 가이드 첨부 [Astra: Setup Datacollect Dependencies 명령 추가 (v2.2.80)] - Python 자동 감지 + yt-dlp / youtube-transcript-api 자동 설치 - macOS PEP 668 환경 자동 폴백 (--user --break-system-packages) - /youtube 등에서 패키지 미설치 감지 시 "Install Now" 버튼 notification [테스트] - tests/agentEngine.test.ts 를 chunked flow 에 맞춰 전체 재작성 - tests/resilience_stress.test.ts Scenario B/D 를 role-aware mock 으로 갱신 - 399/399 통과 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
515 lines
23 KiB
TypeScript
515 lines
23 KiB
TypeScript
/**
|
||
* ============================================================
|
||
* RetrievalOrchestrator — Unified RAG Pipeline
|
||
*
|
||
* Astra의 모든 검색 소스를 통합 관리하는 오케스트레이터입니다.
|
||
*
|
||
* 검색 흐름:
|
||
* ① Query Planning — 의도 분류 + 검색 전략 결정
|
||
* ② Parallel Search — Brain + Memory + Project + Episode 동시 검색
|
||
* ③ Result Fusion — 통합 스코어링 + 중복 제거
|
||
* ④ Context Budget — 토큰 예산 내에서 최종 선택
|
||
* ============================================================
|
||
*/
|
||
|
||
import * as fs from 'fs';
|
||
import * as path from 'path';
|
||
import { BrainProfile } from '../config';
|
||
import { findBrainFiles, summarizeText } from '../utils';
|
||
import { isInside } from '../lib/paths';
|
||
import { MemoryManager } from '../memory';
|
||
import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types';
|
||
import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt, extractBestSection } from './scoring';
|
||
import { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
|
||
import { getBrainTokenIndex, getBrainEmbeddings } from './brainIndex';
|
||
import { extractLessonEssence } from './lessonHelpers';
|
||
import { cosineSimilarity } from './embeddings';
|
||
|
||
export { tokenize, expandQuery, scoreTfIdf, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
|
||
export { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
|
||
export { getBrainTokenIndex, clearBrainTokenIndex } from './brainIndex';
|
||
export * from './types';
|
||
|
||
/** Compact summary of a past chat session for medium-term memory retrieval. */
|
||
export interface RecentSessionSummary {
|
||
id: string;
|
||
title: string;
|
||
firstUserMsg: string;
|
||
lastAssistantExcerpt: string;
|
||
/**
|
||
* Optional LLM-compressed recap stored at session end (~200 chars).
|
||
* When present, retrieval uses this instead of the firstUserMsg+tail
|
||
* fragment because it actually captures the decision/outcome.
|
||
*/
|
||
summary?: string;
|
||
timestamp: number;
|
||
}
|
||
|
||
interface RetrievalOptions {
|
||
brain: BrainProfile;
|
||
memoryManager: MemoryManager;
|
||
workspacePath?: string;
|
||
chatHistory?: Array<{ role: string; content: string }>;
|
||
contextBudget?: Partial<ContextBudgetConfig>;
|
||
brainFileLimit?: number;
|
||
includeRawConversations?: boolean;
|
||
/**
|
||
* Optional absolute folder paths constraining brain-file search to those
|
||
* subtrees. When provided and non-empty, only brain files inside one of
|
||
* the folders are considered. Empty / undefined preserves whole-brain
|
||
* search (legacy behavior). Folders that escape the brain root are
|
||
* silently dropped by the caller (see `agentKnowledgeMap.resolveScopeForAgent`).
|
||
*/
|
||
scopeFolders?: string[];
|
||
/**
|
||
* Compact summaries of recently-touched chat sessions (excluding the
|
||
* active one). Scored against the query and the top `mediumTermLimit`
|
||
* are injected as medium-term memory chunks. Caller pre-computes these
|
||
* to avoid threading vscode/ExtensionContext through this module.
|
||
*/
|
||
recentSessions?: RecentSessionSummary[];
|
||
/** Max number of medium-term session chunks to include after scoring. */
|
||
mediumTermLimit?: number;
|
||
/**
|
||
* Optional query embedding for hybrid (sparse+dense) brain search. When
|
||
* provided, each candidate file's cached embedding is cosine-matched and
|
||
* blended with the TF-IDF score by `embeddingBlendAlpha`. Caller computes
|
||
* this once per turn so we don't pay the embedding RTT inside scoring.
|
||
*/
|
||
queryEmbedding?: number[];
|
||
/** Embedding model name (used as a cache key on the brain index side). */
|
||
embeddingModel?: string;
|
||
/** Blend weight: 0 = TF-IDF only, 1 = cosine only. Default 0.5. */
|
||
embeddingBlendAlpha?: number;
|
||
}
|
||
|
||
export class RetrievalOrchestrator {
|
||
/**
|
||
* 통합 검색을 수행합니다.
|
||
* 모든 소스에서 검색 → TF-IDF 스코어링 → 중복 제거 → 예산 내 선택
|
||
*/
|
||
public retrieve(query: string, options: RetrievalOptions): RetrievalResult {
|
||
const fusionLog: string[] = [];
|
||
const allChunks: RetrievalChunk[] = [];
|
||
const queryTokens = tokenize(query);
|
||
const expandedTokens = expandQuery(queryTokens);
|
||
|
||
fusionLog.push(`Query tokens: [${queryTokens.slice(0, 10).join(', ')}]`);
|
||
fusionLog.push(`Expanded tokens: [${expandedTokens.slice(0, 15).join(', ')}]`);
|
||
|
||
// ── ① Brain File Search (TF-IDF enhanced, optionally hybrid with embeddings) ──
|
||
// `brainFileLimit === 0` is meaningful (Knowledge Mix "model knowledge only"
|
||
// mode), so use `??` rather than `||`. When the caller explicitly passes 0,
|
||
// we skip retrieval entirely instead of falling back to the default of 8.
|
||
const scopeFolders = options.scopeFolders ?? [];
|
||
const brainFileLimit = options.brainFileLimit ?? 8;
|
||
const brainChunks = brainFileLimit > 0
|
||
? this.searchBrainFiles(
|
||
query,
|
||
expandedTokens,
|
||
options.brain,
|
||
brainFileLimit,
|
||
options.includeRawConversations || false,
|
||
scopeFolders,
|
||
options.queryEmbedding,
|
||
options.embeddingModel,
|
||
options.embeddingBlendAlpha
|
||
)
|
||
: [];
|
||
allChunks.push(...brainChunks);
|
||
fusionLog.push(
|
||
brainFileLimit === 0
|
||
? 'Brain search: skipped (Knowledge Mix weight = 0)'
|
||
: scopeFolders.length > 0
|
||
? `Brain search (scoped to ${scopeFolders.length} folder(s)): ${brainChunks.length} chunks`
|
||
: `Brain search: ${brainChunks.length} chunks found`
|
||
);
|
||
|
||
// ── ② Memory Layers ──
|
||
const memoryChunks = this.searchMemoryLayers(
|
||
query,
|
||
options.memoryManager,
|
||
options.chatHistory || [],
|
||
options.workspacePath
|
||
);
|
||
allChunks.push(...memoryChunks);
|
||
fusionLog.push(`Memory search: ${memoryChunks.length} chunks found`);
|
||
|
||
// ── ②-b Medium-Term Memory (recent sessions) ──
|
||
const mediumChunks = this.scoreRecentSessions(
|
||
expandedTokens,
|
||
options.recentSessions || [],
|
||
options.mediumTermLimit ?? 0
|
||
);
|
||
allChunks.push(...mediumChunks);
|
||
fusionLog.push(`Medium-term sessions: ${mediumChunks.length} chunks selected`);
|
||
|
||
// ── ③ Result Fusion — normalize scores across sources ──
|
||
this.normalizeScores(allChunks);
|
||
fusionLog.push(`Total chunks before budget: ${allChunks.length}`);
|
||
|
||
// ── ④ Context Budget Selection ──
|
||
const { selected, dropped, tokensUsed } = selectWithinBudget(
|
||
allChunks,
|
||
options.contextBudget
|
||
);
|
||
// Pull lesson/playbook/qa-finding chunks out so callers can inject them as a prominent
|
||
// "verify before finalizing" block rather than burying them in the brain-knowledge section.
|
||
const lessonChunks = selected.filter((c) => c.metadata.isLesson);
|
||
const selectedChunks = selected.filter((c) => !c.metadata.isLesson);
|
||
fusionLog.push(`Selected: ${selectedChunks.length} (+${lessonChunks.length} lesson), Dropped: ${dropped.length}, Tokens: ${tokensUsed}`);
|
||
|
||
return {
|
||
query,
|
||
totalChunks: allChunks.length,
|
||
selectedChunks,
|
||
droppedChunks: dropped,
|
||
lessonChunks,
|
||
totalTokensUsed: tokensUsed,
|
||
contextBudget: options.contextBudget?.totalBudget || 8000,
|
||
fusionLog
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 검색 결과를 최종 컨텍스트 문자열로 변환합니다 (레슨 청크는 제외 — 별도 블록으로 주입).
|
||
*/
|
||
public buildContextString(result: RetrievalResult): string {
|
||
return assembleContext(result.selectedChunks);
|
||
}
|
||
|
||
// ─── Brain File Search ───
|
||
|
||
private searchBrainFiles(
|
||
query: string,
|
||
expandedTokens: string[],
|
||
brain: BrainProfile,
|
||
limit: number,
|
||
includeRaw: boolean,
|
||
scopeFolders: string[] = [],
|
||
queryEmbedding?: number[],
|
||
embeddingModel?: string,
|
||
embeddingBlendAlpha?: number,
|
||
): RetrievalChunk[] {
|
||
try {
|
||
const scoped = (file: string) => scopeFolders.length === 0
|
||
|| scopeFolders.some((folder) => isInside(folder, file));
|
||
const allFiles = findBrainFiles(brain.localBrainPath)
|
||
.filter(scoped)
|
||
.filter((file) => includeRaw || !this.isRawConversation(path.relative(brain.localBrainPath, file)));
|
||
|
||
if (allFiles.length === 0) return [];
|
||
|
||
// Tokenized docs from the persistent mtime-keyed index — unchanged files are not re-read
|
||
// or re-tokenized, so per-query work over a large brain drops from O(total content) to O(files) stats.
|
||
const indexed = getBrainTokenIndex(brain.localBrainPath, allFiles);
|
||
if (indexed.length === 0) return [];
|
||
|
||
const scored = scoreTfIdfPreTokenized(
|
||
expandedTokens,
|
||
indexed.map((d) => ({
|
||
tokens: d.tokens,
|
||
titleTokens: d.titleTokens,
|
||
lastModified: d.mtimeMs,
|
||
conflictCount: d.conflictCount,
|
||
}))
|
||
);
|
||
|
||
// Hybrid blend: when the caller provided a query embedding and an
|
||
// embedding model, fetch the cached file vectors and add a cosine
|
||
// similarity term to each score. We normalise TF-IDF scores by the
|
||
// top observed value so the two terms live on the same scale before
|
||
// blending. Files without a cached embedding keep their pure TF-IDF
|
||
// score so adding/missing embeddings doesn't hurt retrieval.
|
||
if (queryEmbedding && embeddingModel && (embeddingBlendAlpha ?? 0) > 0) {
|
||
const alpha = Math.max(0, Math.min(1, embeddingBlendAlpha!));
|
||
const filePaths = indexed.map((d) => d.filePath);
|
||
const embeddings = getBrainEmbeddings(brain.localBrainPath, filePaths, embeddingModel);
|
||
if (embeddings.size > 0) {
|
||
const maxTfidf = scored.reduce((m, s) => s.score > m ? s.score : m, 0) || 1;
|
||
let hits = 0;
|
||
for (const s of scored) {
|
||
const fp = indexed[s.index].filePath;
|
||
const vec = embeddings.get(fp);
|
||
if (!vec) continue;
|
||
const cos = cosineSimilarity(queryEmbedding, vec); // [-1, 1] in theory; positive for typical embedding spaces
|
||
const tfidfNorm = s.score / maxTfidf;
|
||
s.score = (1 - alpha) * tfidfNorm + alpha * Math.max(0, cos);
|
||
hits++;
|
||
}
|
||
if (hits > 0) {
|
||
// Re-sort downstream is handled by the .filter().sort() that follows.
|
||
}
|
||
}
|
||
}
|
||
|
||
// Always consider lesson cards for the top slots even if they didn't crack the raw-score top-`limit`:
|
||
// they're short, high-signal, and we want them surfaced when relevant. We keep the regular top-`limit`
|
||
// and additively pull in up to a few lesson cards (deduped by index).
|
||
const ranked = scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score);
|
||
const pickedIdx = new Set<number>();
|
||
for (const s of ranked.slice(0, limit)) pickedIdx.add(s.index);
|
||
const LESSON_EXTRA = 3;
|
||
let lessonExtra = 0;
|
||
for (const s of ranked) {
|
||
if (lessonExtra >= LESSON_EXTRA) break;
|
||
if (pickedIdx.has(s.index)) continue;
|
||
if ((indexed[s.index].kind || '') === '') continue;
|
||
pickedIdx.add(s.index);
|
||
lessonExtra++;
|
||
}
|
||
// Preserve rank order for the chosen set.
|
||
const chosen = ranked.filter((s) => pickedIdx.has(s.index));
|
||
|
||
const topResults: RetrievalChunk[] = [];
|
||
for (const s of chosen) {
|
||
const doc = indexed[s.index];
|
||
const isLesson = (doc.kind || '') !== '';
|
||
// Only the chosen files are actually read off disk (for excerpt extraction).
|
||
let content = '';
|
||
try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — skip */ continue; }
|
||
// Lesson cards: extract just the high-signal sections (Mistake / Root Cause / Fix /
|
||
// Prevention Checklist) instead of dumping the whole 2500-char card. Old lessons
|
||
// without those headings fall back to a query-targeted excerpt. Cuts retrieval tokens
|
||
// by ~70% per lesson without losing the guardrail content.
|
||
//
|
||
// Regular notes: pick the best heading-bounded section for the query (markdown
|
||
// section retrieval) so that long notes don't dump their intro/setup blocks just
|
||
// because they happen to be in the top 400 chars. Falls back to keyword-window
|
||
// extraction inside the section, or whole-doc extraction when there are no
|
||
// headings at all.
|
||
const excerpt = isLesson
|
||
? extractLessonEssence(content, 1200) || extractBestExcerpt(content, expandedTokens, 1200)
|
||
: extractBestSection(content, expandedTokens, 600);
|
||
const cap = isLesson ? 1200 : 600;
|
||
topResults.push({
|
||
id: `brain-${s.index}`,
|
||
source: 'brain-memory' as const,
|
||
title: doc.relativePath,
|
||
content: summarizeText(excerpt, cap),
|
||
score: s.score,
|
||
tokenEstimate: estimateTokens(excerpt),
|
||
metadata: {
|
||
filePath: doc.filePath,
|
||
category: this.inferCategory(doc.relativePath),
|
||
isProjectEvidence: this.isProjectEvidence(doc.relativePath, content),
|
||
lastUpdated: doc.mtimeMs,
|
||
// Phase 5: Scoring Intelligence Integration
|
||
conflictDetected: s.conflictDetected,
|
||
conflictSeverity: s.conflictSeverity,
|
||
queryCoverage: s.queryCoverage,
|
||
...(isLesson ? { isLesson: true, lessonKind: doc.kind } : {}),
|
||
},
|
||
});
|
||
}
|
||
return topResults;
|
||
} catch {
|
||
return [];
|
||
}
|
||
}
|
||
|
||
// ─── Memory Layer Search ───
|
||
|
||
private searchMemoryLayers(
|
||
query: string,
|
||
memoryManager: MemoryManager,
|
||
chatHistory: Array<{ role: string; content: string }>,
|
||
workspacePath?: string
|
||
): RetrievalChunk[] {
|
||
const chunks: RetrievalChunk[] = [];
|
||
|
||
// Long-Term Memory
|
||
const ltm = memoryManager.getLongTermMemory();
|
||
const ltmContext = ltm.buildContext(query);
|
||
if (ltmContext) {
|
||
chunks.push({
|
||
id: 'ltm-context',
|
||
source: 'long-term-memory',
|
||
title: ltmContext.label,
|
||
content: ltmContext.content,
|
||
score: ltmContext.relevance,
|
||
tokenEstimate: estimateTokens(ltmContext.content),
|
||
metadata: { category: 'long-term' }
|
||
});
|
||
}
|
||
|
||
// Project Memory
|
||
if (workspacePath) {
|
||
const pm = memoryManager.getProjectMemory(workspacePath);
|
||
const pmContext = pm.buildContext(query);
|
||
if (pmContext) {
|
||
chunks.push({
|
||
id: 'pm-context',
|
||
source: 'project-memory',
|
||
title: pmContext.label,
|
||
content: pmContext.content,
|
||
score: pmContext.relevance,
|
||
tokenEstimate: estimateTokens(pmContext.content),
|
||
metadata: { category: 'project', isProjectEvidence: true }
|
||
});
|
||
}
|
||
}
|
||
|
||
// Procedural Memory
|
||
const proc = memoryManager.getProceduralMemory();
|
||
const procContext = proc.buildContext(query);
|
||
if (procContext) {
|
||
chunks.push({
|
||
id: 'proc-context',
|
||
source: 'procedural-memory',
|
||
title: procContext.label,
|
||
content: procContext.content,
|
||
score: procContext.relevance,
|
||
tokenEstimate: estimateTokens(procContext.content),
|
||
metadata: { category: 'procedural' }
|
||
});
|
||
}
|
||
|
||
// Episodic Memory
|
||
const ep = memoryManager.getEpisodicMemory();
|
||
const epContext = ep.buildContext(query);
|
||
if (epContext) {
|
||
chunks.push({
|
||
id: 'ep-context',
|
||
source: 'episodic-memory',
|
||
title: epContext.label,
|
||
content: epContext.content,
|
||
score: epContext.relevance,
|
||
tokenEstimate: estimateTokens(epContext.content),
|
||
metadata: { category: 'episodic' }
|
||
});
|
||
}
|
||
|
||
return chunks;
|
||
}
|
||
|
||
// ─── Medium-Term: Recent Sessions ───
|
||
|
||
/**
|
||
* Score the user-provided session summaries against the current query
|
||
* (lightweight token overlap — sessions are small so we skip the TF-IDF
|
||
* machinery) and return up to `limit` as chunks. Each chunk packs the
|
||
* title + first user message + last assistant excerpt — enough for the
|
||
* model to recall the thread without re-injecting the whole transcript.
|
||
*
|
||
* Why include recent sessions at all: short-term covers "this conversation",
|
||
* long-term covers "stable brain notes", but there's a gap for "what we
|
||
* worked on yesterday/last week" that the user expects me to remember.
|
||
*/
|
||
private scoreRecentSessions(
|
||
expandedTokens: string[],
|
||
sessions: RecentSessionSummary[],
|
||
limit: number,
|
||
): RetrievalChunk[] {
|
||
if (!sessions || sessions.length === 0 || limit <= 0) return [];
|
||
const qSet = new Set(expandedTokens.filter((t) => t.length >= 2));
|
||
const scored = sessions.map((s) => {
|
||
// Prefer the LLM-compressed summary when present — it's a real
|
||
// 2-3 sentence recap of the session, so query matches against it
|
||
// are far more meaningful than against an arbitrary head/tail.
|
||
const text = s.summary
|
||
? `${s.title}\n${s.summary}`
|
||
: `${s.title}\n${s.firstUserMsg}\n${s.lastAssistantExcerpt}`;
|
||
const docTokens = tokenize(text);
|
||
let overlap = 0;
|
||
for (const t of docTokens) if (qSet.has(t)) overlap++;
|
||
// Tiny recency boost so equal-overlap sessions prefer the more
|
||
// recent one (most users mean "what we just discussed"). +0.1 max
|
||
// for sessions <7 days old, decays to 0 beyond that.
|
||
const ageDays = s.timestamp ? Math.max(0, (Date.now() - s.timestamp) / 86400000) : 999;
|
||
const recency = ageDays < 7 ? (7 - ageDays) / 70 : 0;
|
||
return { s, score: overlap + recency };
|
||
}).filter((x) => x.score > 0);
|
||
scored.sort((a, b) => b.score - a.score);
|
||
const picked = scored.slice(0, limit);
|
||
if (picked.length === 0) return [];
|
||
return picked.map(({ s, score }, idx) => {
|
||
const dateStr = s.timestamp ? new Date(s.timestamp).toISOString().slice(0, 10) : '';
|
||
// Prefer the LLM-compressed summary; fall back to the raw fragments
|
||
// when the session ended before the summarizer could run (or was
|
||
// too short to summarize, < 3 visible messages).
|
||
const body = s.summary
|
||
? [`**${s.title}**${dateStr ? ` (${dateStr})` : ''}`, s.summary].join('\n')
|
||
: [
|
||
`**${s.title}**${dateStr ? ` (${dateStr})` : ''}`,
|
||
s.firstUserMsg ? `사용자 요청: ${s.firstUserMsg}` : '',
|
||
s.lastAssistantExcerpt ? `이전 답변 마지막 부분: …${s.lastAssistantExcerpt}` : '',
|
||
].filter(Boolean).join('\n');
|
||
return {
|
||
id: `mtm-${idx}-${s.id}`,
|
||
source: 'medium-term-memory',
|
||
title: s.title || '(untitled session)',
|
||
content: body,
|
||
score,
|
||
tokenEstimate: estimateTokens(body),
|
||
metadata: { category: 'medium-term', lastUpdated: s.timestamp },
|
||
};
|
||
});
|
||
}
|
||
|
||
// ─── Score Normalization ───
|
||
|
||
/**
|
||
* 서로 다른 스코어 스케일을 가진 소스들의 점수를 0~1로 정규화합니다.
|
||
*/
|
||
private normalizeScores(chunks: RetrievalChunk[]): void {
|
||
// Group by source
|
||
const groups = new Map<string, RetrievalChunk[]>();
|
||
for (const chunk of chunks) {
|
||
if (!groups.has(chunk.source)) groups.set(chunk.source, []);
|
||
groups.get(chunk.source)!.push(chunk);
|
||
}
|
||
|
||
// Normalize each group independently
|
||
for (const [, group] of groups) {
|
||
const maxScore = Math.max(...group.map((c) => c.score), 0.001);
|
||
for (const chunk of group) {
|
||
chunk.score = chunk.score / maxScore;
|
||
}
|
||
}
|
||
|
||
// Source priority boost (some sources are inherently more valuable for RAG)
|
||
const sourceBoost: Record<string, number> = {
|
||
'brain-trace': 1.0,
|
||
'brain-memory': 0.9,
|
||
'project-memory': 0.85,
|
||
'long-term-memory': 0.8,
|
||
'procedural-memory': 0.95, // Procedural is highly specific
|
||
'medium-term-memory': 0.78, // recent sessions: useful when the user references "last time / yesterday"
|
||
'episodic-memory': 0.7,
|
||
'project-scan': 0.6,
|
||
'recent-knowledge': 0.75
|
||
};
|
||
|
||
for (const chunk of chunks) {
|
||
const boost = sourceBoost[chunk.source] || 0.5;
|
||
chunk.score *= boost;
|
||
// Lesson cards are short, high-signal guardrails — nudge relevant ones above ordinary brain notes
|
||
// so they survive the budget. Modest (1.4×) so they don't crowd everything out when many match.
|
||
if (chunk.metadata.isLesson) chunk.score *= 1.4;
|
||
}
|
||
}
|
||
|
||
// ─── Helpers ───
|
||
|
||
private isRawConversation(relativePath: string): boolean {
|
||
return /(^|[\\/])(00_Raw|raw-data|conversations?|transcripts?)([\\/]|$)/i.test(relativePath);
|
||
}
|
||
|
||
private inferCategory(relativePath: string): string {
|
||
const normalized = relativePath.toLowerCase();
|
||
if (/(decisions?|adr|planning)/i.test(normalized)) return 'decision';
|
||
if (/(records|development|bugs)/i.test(normalized)) return 'project-record';
|
||
if (/(architecture|design|pattern)/i.test(normalized)) return 'architecture';
|
||
if (/(knowledge|wiki|topics)/i.test(normalized)) return 'knowledge';
|
||
return 'general';
|
||
}
|
||
|
||
private isProjectEvidence(relativePath: string, content: string): boolean {
|
||
const normalized = relativePath.toLowerCase();
|
||
if (/(records|planning|development|bugs|retrospectives|projectchronicle)/i.test(normalized)) return true;
|
||
if (/adr-\d+|(^|[\\/])decisions?([\\/]|$)/i.test(normalized)) return true;
|
||
return false;
|
||
}
|
||
}
|