Files
connectai/src/retrieval/index.ts
T
g1nation ded3eea7ce feat: v2.2.74 → v2.2.82 — chunked writer + 코드 리뷰 패치 + /youtube 확장
주요 변경:

[chunked writer 아키텍처 (v2.2.74~v2.2.75)]
- 5-stage 다중 에이전트(planner/researcher/reflector/writer/synthesizer)
  파이프라인 제거 → 단일 ChunkedWriter 의 outline → section[N] → polish
  3-step 으로 교체. 본문 분석에서 추상화 손실 / 토큰 폭증 문제 해소
- 답변 길이 자동 분기: 짧은 prompt 는 fast-path direct 1회 호출,
  본문 분석은 chunked. outline 빈 배열도 direct 폴백

[코드 리뷰 9개 항목 일괄 패치 (v2.2.76)]
- /research polling hang 방어 (heartbeat + status 정규화 + 연속 실패 abort)
- 회사 모드 dispatcher abort 신호를 AIService.chat 까지 전달
- bridgeFetch 에 onHeartbeat 콜백 도입 (slow endpoint 사용자 친화적)
- dead code 정리: reflectionPersister.ts 제거 + enableReflection 등 좀비 config 키
- parseOutline 의 empty vs fallback reason 명시적 분리
- chatHandlers 의 회사 모드 케이스 ~325줄을 src/sidebar/companyHandlers.ts 로 분리
- Intent Alignment 라운드 한도 도달 시 smart 모드 자동 진행
- LM Studio doSwitch unload 실패 시 currentModel 정리 + load 강행
- retrieval informationDensity → queryCoverage 정합화

[/youtube 채널 지원 (v2.2.77~v2.2.82)]
- 채널/플레이리스트 URL 자동 감지 + n:N 으로 영상 개수 지정 (최대 50)
- 채널 루트 URL 에 /videos 탭 자동 append (yt-dlp enumeration 정상화)
- 영상별 순차 처리 (queue 패턴) + i/N 진행 표시 + 마지막 통계 요약
- mode:info / mode:benchmark / mode:both 분석 모드 분기
  - info: 영상 내용을 지식 카드로 추출 (튜토리얼·강의·뉴스용)
  - benchmark: 4-렌즈 대본 역기획서 (콘텐츠 제작 벤치마크용)
  - both: 둘 다 (기본)
  - bare keyword 도 허용: /youtube <url> n:1 info
- bridge 에러 메시지 [object Object] 깨짐 수정 (구조화 에러 추출)
- "패키지 없음" 등 환경 의존성 에러에 자동 가이드 첨부

[Astra: Setup Datacollect Dependencies 명령 추가 (v2.2.80)]
- Python 자동 감지 + yt-dlp / youtube-transcript-api 자동 설치
- macOS PEP 668 환경 자동 폴백 (--user --break-system-packages)
- /youtube 등에서 패키지 미설치 감지 시 "Install Now" 버튼 notification

[테스트]
- tests/agentEngine.test.ts 를 chunked flow 에 맞춰 전체 재작성
- tests/resilience_stress.test.ts Scenario B/D 를 role-aware mock 으로 갱신
- 399/399 통과

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-23 23:13:21 +09:00

515 lines
23 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* ============================================================
* RetrievalOrchestrator — Unified RAG Pipeline
*
* Astra의 모든 검색 소스를 통합 관리하는 오케스트레이터입니다.
*
* 검색 흐름:
* ① Query Planning — 의도 분류 + 검색 전략 결정
* ② Parallel Search — Brain + Memory + Project + Episode 동시 검색
* ③ Result Fusion — 통합 스코어링 + 중복 제거
* ④ Context Budget — 토큰 예산 내에서 최종 선택
* ============================================================
*/
import * as fs from 'fs';
import * as path from 'path';
import { BrainProfile } from '../config';
import { findBrainFiles, summarizeText } from '../utils';
import { isInside } from '../lib/paths';
import { MemoryManager } from '../memory';
import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types';
import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt, extractBestSection } from './scoring';
import { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
import { getBrainTokenIndex, getBrainEmbeddings } from './brainIndex';
import { extractLessonEssence } from './lessonHelpers';
import { cosineSimilarity } from './embeddings';
export { tokenize, expandQuery, scoreTfIdf, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
export { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
export { getBrainTokenIndex, clearBrainTokenIndex } from './brainIndex';
export * from './types';
/** Compact summary of a past chat session for medium-term memory retrieval. */
export interface RecentSessionSummary {
id: string;
title: string;
firstUserMsg: string;
lastAssistantExcerpt: string;
/**
* Optional LLM-compressed recap stored at session end (~200 chars).
* When present, retrieval uses this instead of the firstUserMsg+tail
* fragment because it actually captures the decision/outcome.
*/
summary?: string;
timestamp: number;
}
interface RetrievalOptions {
brain: BrainProfile;
memoryManager: MemoryManager;
workspacePath?: string;
chatHistory?: Array<{ role: string; content: string }>;
contextBudget?: Partial<ContextBudgetConfig>;
brainFileLimit?: number;
includeRawConversations?: boolean;
/**
* Optional absolute folder paths constraining brain-file search to those
* subtrees. When provided and non-empty, only brain files inside one of
* the folders are considered. Empty / undefined preserves whole-brain
* search (legacy behavior). Folders that escape the brain root are
* silently dropped by the caller (see `agentKnowledgeMap.resolveScopeForAgent`).
*/
scopeFolders?: string[];
/**
* Compact summaries of recently-touched chat sessions (excluding the
* active one). Scored against the query and the top `mediumTermLimit`
* are injected as medium-term memory chunks. Caller pre-computes these
* to avoid threading vscode/ExtensionContext through this module.
*/
recentSessions?: RecentSessionSummary[];
/** Max number of medium-term session chunks to include after scoring. */
mediumTermLimit?: number;
/**
* Optional query embedding for hybrid (sparse+dense) brain search. When
* provided, each candidate file's cached embedding is cosine-matched and
* blended with the TF-IDF score by `embeddingBlendAlpha`. Caller computes
* this once per turn so we don't pay the embedding RTT inside scoring.
*/
queryEmbedding?: number[];
/** Embedding model name (used as a cache key on the brain index side). */
embeddingModel?: string;
/** Blend weight: 0 = TF-IDF only, 1 = cosine only. Default 0.5. */
embeddingBlendAlpha?: number;
}
export class RetrievalOrchestrator {
/**
* 통합 검색을 수행합니다.
* 모든 소스에서 검색 → TF-IDF 스코어링 → 중복 제거 → 예산 내 선택
*/
public retrieve(query: string, options: RetrievalOptions): RetrievalResult {
const fusionLog: string[] = [];
const allChunks: RetrievalChunk[] = [];
const queryTokens = tokenize(query);
const expandedTokens = expandQuery(queryTokens);
fusionLog.push(`Query tokens: [${queryTokens.slice(0, 10).join(', ')}]`);
fusionLog.push(`Expanded tokens: [${expandedTokens.slice(0, 15).join(', ')}]`);
// ── ① Brain File Search (TF-IDF enhanced, optionally hybrid with embeddings) ──
// `brainFileLimit === 0` is meaningful (Knowledge Mix "model knowledge only"
// mode), so use `??` rather than `||`. When the caller explicitly passes 0,
// we skip retrieval entirely instead of falling back to the default of 8.
const scopeFolders = options.scopeFolders ?? [];
const brainFileLimit = options.brainFileLimit ?? 8;
const brainChunks = brainFileLimit > 0
? this.searchBrainFiles(
query,
expandedTokens,
options.brain,
brainFileLimit,
options.includeRawConversations || false,
scopeFolders,
options.queryEmbedding,
options.embeddingModel,
options.embeddingBlendAlpha
)
: [];
allChunks.push(...brainChunks);
fusionLog.push(
brainFileLimit === 0
? 'Brain search: skipped (Knowledge Mix weight = 0)'
: scopeFolders.length > 0
? `Brain search (scoped to ${scopeFolders.length} folder(s)): ${brainChunks.length} chunks`
: `Brain search: ${brainChunks.length} chunks found`
);
// ── ② Memory Layers ──
const memoryChunks = this.searchMemoryLayers(
query,
options.memoryManager,
options.chatHistory || [],
options.workspacePath
);
allChunks.push(...memoryChunks);
fusionLog.push(`Memory search: ${memoryChunks.length} chunks found`);
// ── ②-b Medium-Term Memory (recent sessions) ──
const mediumChunks = this.scoreRecentSessions(
expandedTokens,
options.recentSessions || [],
options.mediumTermLimit ?? 0
);
allChunks.push(...mediumChunks);
fusionLog.push(`Medium-term sessions: ${mediumChunks.length} chunks selected`);
// ── ③ Result Fusion — normalize scores across sources ──
this.normalizeScores(allChunks);
fusionLog.push(`Total chunks before budget: ${allChunks.length}`);
// ── ④ Context Budget Selection ──
const { selected, dropped, tokensUsed } = selectWithinBudget(
allChunks,
options.contextBudget
);
// Pull lesson/playbook/qa-finding chunks out so callers can inject them as a prominent
// "verify before finalizing" block rather than burying them in the brain-knowledge section.
const lessonChunks = selected.filter((c) => c.metadata.isLesson);
const selectedChunks = selected.filter((c) => !c.metadata.isLesson);
fusionLog.push(`Selected: ${selectedChunks.length} (+${lessonChunks.length} lesson), Dropped: ${dropped.length}, Tokens: ${tokensUsed}`);
return {
query,
totalChunks: allChunks.length,
selectedChunks,
droppedChunks: dropped,
lessonChunks,
totalTokensUsed: tokensUsed,
contextBudget: options.contextBudget?.totalBudget || 8000,
fusionLog
};
}
/**
* 검색 결과를 최종 컨텍스트 문자열로 변환합니다 (레슨 청크는 제외 — 별도 블록으로 주입).
*/
public buildContextString(result: RetrievalResult): string {
return assembleContext(result.selectedChunks);
}
// ─── Brain File Search ───
private searchBrainFiles(
query: string,
expandedTokens: string[],
brain: BrainProfile,
limit: number,
includeRaw: boolean,
scopeFolders: string[] = [],
queryEmbedding?: number[],
embeddingModel?: string,
embeddingBlendAlpha?: number,
): RetrievalChunk[] {
try {
const scoped = (file: string) => scopeFolders.length === 0
|| scopeFolders.some((folder) => isInside(folder, file));
const allFiles = findBrainFiles(brain.localBrainPath)
.filter(scoped)
.filter((file) => includeRaw || !this.isRawConversation(path.relative(brain.localBrainPath, file)));
if (allFiles.length === 0) return [];
// Tokenized docs from the persistent mtime-keyed index — unchanged files are not re-read
// or re-tokenized, so per-query work over a large brain drops from O(total content) to O(files) stats.
const indexed = getBrainTokenIndex(brain.localBrainPath, allFiles);
if (indexed.length === 0) return [];
const scored = scoreTfIdfPreTokenized(
expandedTokens,
indexed.map((d) => ({
tokens: d.tokens,
titleTokens: d.titleTokens,
lastModified: d.mtimeMs,
conflictCount: d.conflictCount,
}))
);
// Hybrid blend: when the caller provided a query embedding and an
// embedding model, fetch the cached file vectors and add a cosine
// similarity term to each score. We normalise TF-IDF scores by the
// top observed value so the two terms live on the same scale before
// blending. Files without a cached embedding keep their pure TF-IDF
// score so adding/missing embeddings doesn't hurt retrieval.
if (queryEmbedding && embeddingModel && (embeddingBlendAlpha ?? 0) > 0) {
const alpha = Math.max(0, Math.min(1, embeddingBlendAlpha!));
const filePaths = indexed.map((d) => d.filePath);
const embeddings = getBrainEmbeddings(brain.localBrainPath, filePaths, embeddingModel);
if (embeddings.size > 0) {
const maxTfidf = scored.reduce((m, s) => s.score > m ? s.score : m, 0) || 1;
let hits = 0;
for (const s of scored) {
const fp = indexed[s.index].filePath;
const vec = embeddings.get(fp);
if (!vec) continue;
const cos = cosineSimilarity(queryEmbedding, vec); // [-1, 1] in theory; positive for typical embedding spaces
const tfidfNorm = s.score / maxTfidf;
s.score = (1 - alpha) * tfidfNorm + alpha * Math.max(0, cos);
hits++;
}
if (hits > 0) {
// Re-sort downstream is handled by the .filter().sort() that follows.
}
}
}
// Always consider lesson cards for the top slots even if they didn't crack the raw-score top-`limit`:
// they're short, high-signal, and we want them surfaced when relevant. We keep the regular top-`limit`
// and additively pull in up to a few lesson cards (deduped by index).
const ranked = scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score);
const pickedIdx = new Set<number>();
for (const s of ranked.slice(0, limit)) pickedIdx.add(s.index);
const LESSON_EXTRA = 3;
let lessonExtra = 0;
for (const s of ranked) {
if (lessonExtra >= LESSON_EXTRA) break;
if (pickedIdx.has(s.index)) continue;
if ((indexed[s.index].kind || '') === '') continue;
pickedIdx.add(s.index);
lessonExtra++;
}
// Preserve rank order for the chosen set.
const chosen = ranked.filter((s) => pickedIdx.has(s.index));
const topResults: RetrievalChunk[] = [];
for (const s of chosen) {
const doc = indexed[s.index];
const isLesson = (doc.kind || '') !== '';
// Only the chosen files are actually read off disk (for excerpt extraction).
let content = '';
try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — skip */ continue; }
// Lesson cards: extract just the high-signal sections (Mistake / Root Cause / Fix /
// Prevention Checklist) instead of dumping the whole 2500-char card. Old lessons
// without those headings fall back to a query-targeted excerpt. Cuts retrieval tokens
// by ~70% per lesson without losing the guardrail content.
//
// Regular notes: pick the best heading-bounded section for the query (markdown
// section retrieval) so that long notes don't dump their intro/setup blocks just
// because they happen to be in the top 400 chars. Falls back to keyword-window
// extraction inside the section, or whole-doc extraction when there are no
// headings at all.
const excerpt = isLesson
? extractLessonEssence(content, 1200) || extractBestExcerpt(content, expandedTokens, 1200)
: extractBestSection(content, expandedTokens, 600);
const cap = isLesson ? 1200 : 600;
topResults.push({
id: `brain-${s.index}`,
source: 'brain-memory' as const,
title: doc.relativePath,
content: summarizeText(excerpt, cap),
score: s.score,
tokenEstimate: estimateTokens(excerpt),
metadata: {
filePath: doc.filePath,
category: this.inferCategory(doc.relativePath),
isProjectEvidence: this.isProjectEvidence(doc.relativePath, content),
lastUpdated: doc.mtimeMs,
// Phase 5: Scoring Intelligence Integration
conflictDetected: s.conflictDetected,
conflictSeverity: s.conflictSeverity,
queryCoverage: s.queryCoverage,
...(isLesson ? { isLesson: true, lessonKind: doc.kind } : {}),
},
});
}
return topResults;
} catch {
return [];
}
}
// ─── Memory Layer Search ───
private searchMemoryLayers(
query: string,
memoryManager: MemoryManager,
chatHistory: Array<{ role: string; content: string }>,
workspacePath?: string
): RetrievalChunk[] {
const chunks: RetrievalChunk[] = [];
// Long-Term Memory
const ltm = memoryManager.getLongTermMemory();
const ltmContext = ltm.buildContext(query);
if (ltmContext) {
chunks.push({
id: 'ltm-context',
source: 'long-term-memory',
title: ltmContext.label,
content: ltmContext.content,
score: ltmContext.relevance,
tokenEstimate: estimateTokens(ltmContext.content),
metadata: { category: 'long-term' }
});
}
// Project Memory
if (workspacePath) {
const pm = memoryManager.getProjectMemory(workspacePath);
const pmContext = pm.buildContext(query);
if (pmContext) {
chunks.push({
id: 'pm-context',
source: 'project-memory',
title: pmContext.label,
content: pmContext.content,
score: pmContext.relevance,
tokenEstimate: estimateTokens(pmContext.content),
metadata: { category: 'project', isProjectEvidence: true }
});
}
}
// Procedural Memory
const proc = memoryManager.getProceduralMemory();
const procContext = proc.buildContext(query);
if (procContext) {
chunks.push({
id: 'proc-context',
source: 'procedural-memory',
title: procContext.label,
content: procContext.content,
score: procContext.relevance,
tokenEstimate: estimateTokens(procContext.content),
metadata: { category: 'procedural' }
});
}
// Episodic Memory
const ep = memoryManager.getEpisodicMemory();
const epContext = ep.buildContext(query);
if (epContext) {
chunks.push({
id: 'ep-context',
source: 'episodic-memory',
title: epContext.label,
content: epContext.content,
score: epContext.relevance,
tokenEstimate: estimateTokens(epContext.content),
metadata: { category: 'episodic' }
});
}
return chunks;
}
// ─── Medium-Term: Recent Sessions ───
/**
* Score the user-provided session summaries against the current query
* (lightweight token overlap — sessions are small so we skip the TF-IDF
* machinery) and return up to `limit` as chunks. Each chunk packs the
* title + first user message + last assistant excerpt — enough for the
* model to recall the thread without re-injecting the whole transcript.
*
* Why include recent sessions at all: short-term covers "this conversation",
* long-term covers "stable brain notes", but there's a gap for "what we
* worked on yesterday/last week" that the user expects me to remember.
*/
private scoreRecentSessions(
expandedTokens: string[],
sessions: RecentSessionSummary[],
limit: number,
): RetrievalChunk[] {
if (!sessions || sessions.length === 0 || limit <= 0) return [];
const qSet = new Set(expandedTokens.filter((t) => t.length >= 2));
const scored = sessions.map((s) => {
// Prefer the LLM-compressed summary when present — it's a real
// 2-3 sentence recap of the session, so query matches against it
// are far more meaningful than against an arbitrary head/tail.
const text = s.summary
? `${s.title}\n${s.summary}`
: `${s.title}\n${s.firstUserMsg}\n${s.lastAssistantExcerpt}`;
const docTokens = tokenize(text);
let overlap = 0;
for (const t of docTokens) if (qSet.has(t)) overlap++;
// Tiny recency boost so equal-overlap sessions prefer the more
// recent one (most users mean "what we just discussed"). +0.1 max
// for sessions <7 days old, decays to 0 beyond that.
const ageDays = s.timestamp ? Math.max(0, (Date.now() - s.timestamp) / 86400000) : 999;
const recency = ageDays < 7 ? (7 - ageDays) / 70 : 0;
return { s, score: overlap + recency };
}).filter((x) => x.score > 0);
scored.sort((a, b) => b.score - a.score);
const picked = scored.slice(0, limit);
if (picked.length === 0) return [];
return picked.map(({ s, score }, idx) => {
const dateStr = s.timestamp ? new Date(s.timestamp).toISOString().slice(0, 10) : '';
// Prefer the LLM-compressed summary; fall back to the raw fragments
// when the session ended before the summarizer could run (or was
// too short to summarize, < 3 visible messages).
const body = s.summary
? [`**${s.title}**${dateStr ? ` (${dateStr})` : ''}`, s.summary].join('\n')
: [
`**${s.title}**${dateStr ? ` (${dateStr})` : ''}`,
s.firstUserMsg ? `사용자 요청: ${s.firstUserMsg}` : '',
s.lastAssistantExcerpt ? `이전 답변 마지막 부분: …${s.lastAssistantExcerpt}` : '',
].filter(Boolean).join('\n');
return {
id: `mtm-${idx}-${s.id}`,
source: 'medium-term-memory',
title: s.title || '(untitled session)',
content: body,
score,
tokenEstimate: estimateTokens(body),
metadata: { category: 'medium-term', lastUpdated: s.timestamp },
};
});
}
// ─── Score Normalization ───
/**
* 서로 다른 스코어 스케일을 가진 소스들의 점수를 0~1로 정규화합니다.
*/
private normalizeScores(chunks: RetrievalChunk[]): void {
// Group by source
const groups = new Map<string, RetrievalChunk[]>();
for (const chunk of chunks) {
if (!groups.has(chunk.source)) groups.set(chunk.source, []);
groups.get(chunk.source)!.push(chunk);
}
// Normalize each group independently
for (const [, group] of groups) {
const maxScore = Math.max(...group.map((c) => c.score), 0.001);
for (const chunk of group) {
chunk.score = chunk.score / maxScore;
}
}
// Source priority boost (some sources are inherently more valuable for RAG)
const sourceBoost: Record<string, number> = {
'brain-trace': 1.0,
'brain-memory': 0.9,
'project-memory': 0.85,
'long-term-memory': 0.8,
'procedural-memory': 0.95, // Procedural is highly specific
'medium-term-memory': 0.78, // recent sessions: useful when the user references "last time / yesterday"
'episodic-memory': 0.7,
'project-scan': 0.6,
'recent-knowledge': 0.75
};
for (const chunk of chunks) {
const boost = sourceBoost[chunk.source] || 0.5;
chunk.score *= boost;
// Lesson cards are short, high-signal guardrails — nudge relevant ones above ordinary brain notes
// so they survive the budget. Modest (1.4×) so they don't crowd everything out when many match.
if (chunk.metadata.isLesson) chunk.score *= 1.4;
}
}
// ─── Helpers ───
private isRawConversation(relativePath: string): boolean {
return /(^|[\\/])(00_Raw|raw-data|conversations?|transcripts?)([\\/]|$)/i.test(relativePath);
}
private inferCategory(relativePath: string): string {
const normalized = relativePath.toLowerCase();
if (/(decisions?|adr|planning)/i.test(normalized)) return 'decision';
if (/(records|development|bugs)/i.test(normalized)) return 'project-record';
if (/(architecture|design|pattern)/i.test(normalized)) return 'architecture';
if (/(knowledge|wiki|topics)/i.test(normalized)) return 'knowledge';
return 'general';
}
private isProjectEvidence(relativePath: string, content: string): boolean {
const normalized = relativePath.toLowerCase();
if (/(records|planning|development|bugs|retrospectives|projectchronicle)/i.test(normalized)) return true;
if (/adr-\d+|(^|[\\/])decisions?([\\/]|$)/i.test(normalized)) return true;
return false;
}
}