refactor: optimize core engine and retrieval logic for v2.80.43
This commit is contained in:
+163
-10
@@ -19,15 +19,32 @@ import { findBrainFiles, summarizeText } from '../utils';
|
||||
import { isInside } from '../lib/paths';
|
||||
import { MemoryManager } from '../memory';
|
||||
import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types';
|
||||
import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
|
||||
import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt, extractBestSection } from './scoring';
|
||||
import { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
|
||||
import { getBrainTokenIndex } from './brainIndex';
|
||||
import { getBrainTokenIndex, getBrainEmbeddings } from './brainIndex';
|
||||
import { extractLessonEssence } from './lessonHelpers';
|
||||
import { cosineSimilarity } from './embeddings';
|
||||
|
||||
export { tokenize, expandQuery, scoreTfIdf, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
|
||||
export { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
|
||||
export { getBrainTokenIndex, clearBrainTokenIndex } from './brainIndex';
|
||||
export * from './types';
|
||||
|
||||
/** Compact summary of a past chat session for medium-term memory retrieval. */
|
||||
export interface RecentSessionSummary {
|
||||
id: string;
|
||||
title: string;
|
||||
firstUserMsg: string;
|
||||
lastAssistantExcerpt: string;
|
||||
/**
|
||||
* Optional LLM-compressed recap stored at session end (~200 chars).
|
||||
* When present, retrieval uses this instead of the firstUserMsg+tail
|
||||
* fragment because it actually captures the decision/outcome.
|
||||
*/
|
||||
summary?: string;
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
interface RetrievalOptions {
|
||||
brain: BrainProfile;
|
||||
memoryManager: MemoryManager;
|
||||
@@ -44,6 +61,26 @@ interface RetrievalOptions {
|
||||
* silently dropped by the caller (see `agentKnowledgeMap.resolveScopeForAgent`).
|
||||
*/
|
||||
scopeFolders?: string[];
|
||||
/**
|
||||
* Compact summaries of recently-touched chat sessions (excluding the
|
||||
* active one). Scored against the query and the top `mediumTermLimit`
|
||||
* are injected as medium-term memory chunks. Caller pre-computes these
|
||||
* to avoid threading vscode/ExtensionContext through this module.
|
||||
*/
|
||||
recentSessions?: RecentSessionSummary[];
|
||||
/** Max number of medium-term session chunks to include after scoring. */
|
||||
mediumTermLimit?: number;
|
||||
/**
|
||||
* Optional query embedding for hybrid (sparse+dense) brain search. When
|
||||
* provided, each candidate file's cached embedding is cosine-matched and
|
||||
* blended with the TF-IDF score by `embeddingBlendAlpha`. Caller computes
|
||||
* this once per turn so we don't pay the embedding RTT inside scoring.
|
||||
*/
|
||||
queryEmbedding?: number[];
|
||||
/** Embedding model name (used as a cache key on the brain index side). */
|
||||
embeddingModel?: string;
|
||||
/** Blend weight: 0 = TF-IDF only, 1 = cosine only. Default 0.5. */
|
||||
embeddingBlendAlpha?: number;
|
||||
}
|
||||
|
||||
export class RetrievalOrchestrator {
|
||||
@@ -60,7 +97,7 @@ export class RetrievalOrchestrator {
|
||||
fusionLog.push(`Query tokens: [${queryTokens.slice(0, 10).join(', ')}]`);
|
||||
fusionLog.push(`Expanded tokens: [${expandedTokens.slice(0, 15).join(', ')}]`);
|
||||
|
||||
// ── ① Brain File Search (TF-IDF enhanced) ──
|
||||
// ── ① Brain File Search (TF-IDF enhanced, optionally hybrid with embeddings) ──
|
||||
const scopeFolders = options.scopeFolders ?? [];
|
||||
const brainChunks = this.searchBrainFiles(
|
||||
query,
|
||||
@@ -68,7 +105,10 @@ export class RetrievalOrchestrator {
|
||||
options.brain,
|
||||
options.brainFileLimit || 8,
|
||||
options.includeRawConversations || false,
|
||||
scopeFolders
|
||||
scopeFolders,
|
||||
options.queryEmbedding,
|
||||
options.embeddingModel,
|
||||
options.embeddingBlendAlpha
|
||||
);
|
||||
allChunks.push(...brainChunks);
|
||||
fusionLog.push(
|
||||
@@ -87,6 +127,15 @@ export class RetrievalOrchestrator {
|
||||
allChunks.push(...memoryChunks);
|
||||
fusionLog.push(`Memory search: ${memoryChunks.length} chunks found`);
|
||||
|
||||
// ── ②-b Medium-Term Memory (recent sessions) ──
|
||||
const mediumChunks = this.scoreRecentSessions(
|
||||
expandedTokens,
|
||||
options.recentSessions || [],
|
||||
options.mediumTermLimit ?? 0
|
||||
);
|
||||
allChunks.push(...mediumChunks);
|
||||
fusionLog.push(`Medium-term sessions: ${mediumChunks.length} chunks selected`);
|
||||
|
||||
// ── ③ Result Fusion — normalize scores across sources ──
|
||||
this.normalizeScores(allChunks);
|
||||
fusionLog.push(`Total chunks before budget: ${allChunks.length}`);
|
||||
@@ -129,7 +178,10 @@ export class RetrievalOrchestrator {
|
||||
brain: BrainProfile,
|
||||
limit: number,
|
||||
includeRaw: boolean,
|
||||
scopeFolders: string[] = []
|
||||
scopeFolders: string[] = [],
|
||||
queryEmbedding?: number[],
|
||||
embeddingModel?: string,
|
||||
embeddingBlendAlpha?: number,
|
||||
): RetrievalChunk[] {
|
||||
try {
|
||||
const scoped = (file: string) => scopeFolders.length === 0
|
||||
@@ -155,6 +207,34 @@ export class RetrievalOrchestrator {
|
||||
}))
|
||||
);
|
||||
|
||||
// Hybrid blend: when the caller provided a query embedding and an
|
||||
// embedding model, fetch the cached file vectors and add a cosine
|
||||
// similarity term to each score. We normalise TF-IDF scores by the
|
||||
// top observed value so the two terms live on the same scale before
|
||||
// blending. Files without a cached embedding keep their pure TF-IDF
|
||||
// score so adding/missing embeddings doesn't hurt retrieval.
|
||||
if (queryEmbedding && embeddingModel && (embeddingBlendAlpha ?? 0) > 0) {
|
||||
const alpha = Math.max(0, Math.min(1, embeddingBlendAlpha!));
|
||||
const filePaths = indexed.map((d) => d.filePath);
|
||||
const embeddings = getBrainEmbeddings(brain.localBrainPath, filePaths, embeddingModel);
|
||||
if (embeddings.size > 0) {
|
||||
const maxTfidf = scored.reduce((m, s) => s.score > m ? s.score : m, 0) || 1;
|
||||
let hits = 0;
|
||||
for (const s of scored) {
|
||||
const fp = indexed[s.index].filePath;
|
||||
const vec = embeddings.get(fp);
|
||||
if (!vec) continue;
|
||||
const cos = cosineSimilarity(queryEmbedding, vec); // [-1, 1] in theory; positive for typical embedding spaces
|
||||
const tfidfNorm = s.score / maxTfidf;
|
||||
s.score = (1 - alpha) * tfidfNorm + alpha * Math.max(0, cos);
|
||||
hits++;
|
||||
}
|
||||
if (hits > 0) {
|
||||
// Re-sort downstream is handled by the .filter().sort() that follows.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Always consider lesson cards for the top slots even if they didn't crack the raw-score top-`limit`:
|
||||
// they're short, high-signal, and we want them surfaced when relevant. We keep the regular top-`limit`
|
||||
// and additively pull in up to a few lesson cards (deduped by index).
|
||||
@@ -180,12 +260,20 @@ export class RetrievalOrchestrator {
|
||||
// Only the chosen files are actually read off disk (for excerpt extraction).
|
||||
let content = '';
|
||||
try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — skip */ continue; }
|
||||
// Lesson cards: hand back the whole card (they're meant to be short) so the Prevention Checklist
|
||||
// survives; fall back to a generous excerpt for long ones. Regular notes: the usual 400-char excerpt.
|
||||
// Lesson cards: extract just the high-signal sections (Mistake / Root Cause / Fix /
|
||||
// Prevention Checklist) instead of dumping the whole 2500-char card. Old lessons
|
||||
// without those headings fall back to a query-targeted excerpt. Cuts retrieval tokens
|
||||
// by ~70% per lesson without losing the guardrail content.
|
||||
//
|
||||
// Regular notes: pick the best heading-bounded section for the query (markdown
|
||||
// section retrieval) so that long notes don't dump their intro/setup blocks just
|
||||
// because they happen to be in the top 400 chars. Falls back to keyword-window
|
||||
// extraction inside the section, or whole-doc extraction when there are no
|
||||
// headings at all.
|
||||
const excerpt = isLesson
|
||||
? (content.length <= 2500 ? content.trim() : extractBestExcerpt(content, expandedTokens, 1500))
|
||||
: extractBestExcerpt(content, expandedTokens, 400);
|
||||
const cap = isLesson ? 2500 : 400;
|
||||
? extractLessonEssence(content, 1200) || extractBestExcerpt(content, expandedTokens, 1200)
|
||||
: extractBestSection(content, expandedTokens, 600);
|
||||
const cap = isLesson ? 1200 : 600;
|
||||
topResults.push({
|
||||
id: `brain-${s.index}`,
|
||||
source: 'brain-memory' as const,
|
||||
@@ -287,6 +375,70 @@ export class RetrievalOrchestrator {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
// ─── Medium-Term: Recent Sessions ───
|
||||
|
||||
/**
|
||||
* Score the user-provided session summaries against the current query
|
||||
* (lightweight token overlap — sessions are small so we skip the TF-IDF
|
||||
* machinery) and return up to `limit` as chunks. Each chunk packs the
|
||||
* title + first user message + last assistant excerpt — enough for the
|
||||
* model to recall the thread without re-injecting the whole transcript.
|
||||
*
|
||||
* Why include recent sessions at all: short-term covers "this conversation",
|
||||
* long-term covers "stable brain notes", but there's a gap for "what we
|
||||
* worked on yesterday/last week" that the user expects me to remember.
|
||||
*/
|
||||
private scoreRecentSessions(
|
||||
expandedTokens: string[],
|
||||
sessions: RecentSessionSummary[],
|
||||
limit: number,
|
||||
): RetrievalChunk[] {
|
||||
if (!sessions || sessions.length === 0 || limit <= 0) return [];
|
||||
const qSet = new Set(expandedTokens.filter((t) => t.length >= 2));
|
||||
const scored = sessions.map((s) => {
|
||||
// Prefer the LLM-compressed summary when present — it's a real
|
||||
// 2-3 sentence recap of the session, so query matches against it
|
||||
// are far more meaningful than against an arbitrary head/tail.
|
||||
const text = s.summary
|
||||
? `${s.title}\n${s.summary}`
|
||||
: `${s.title}\n${s.firstUserMsg}\n${s.lastAssistantExcerpt}`;
|
||||
const docTokens = tokenize(text);
|
||||
let overlap = 0;
|
||||
for (const t of docTokens) if (qSet.has(t)) overlap++;
|
||||
// Tiny recency boost so equal-overlap sessions prefer the more
|
||||
// recent one (most users mean "what we just discussed"). +0.1 max
|
||||
// for sessions <7 days old, decays to 0 beyond that.
|
||||
const ageDays = s.timestamp ? Math.max(0, (Date.now() - s.timestamp) / 86400000) : 999;
|
||||
const recency = ageDays < 7 ? (7 - ageDays) / 70 : 0;
|
||||
return { s, score: overlap + recency };
|
||||
}).filter((x) => x.score > 0);
|
||||
scored.sort((a, b) => b.score - a.score);
|
||||
const picked = scored.slice(0, limit);
|
||||
if (picked.length === 0) return [];
|
||||
return picked.map(({ s, score }, idx) => {
|
||||
const dateStr = s.timestamp ? new Date(s.timestamp).toISOString().slice(0, 10) : '';
|
||||
// Prefer the LLM-compressed summary; fall back to the raw fragments
|
||||
// when the session ended before the summarizer could run (or was
|
||||
// too short to summarize, < 3 visible messages).
|
||||
const body = s.summary
|
||||
? [`**${s.title}**${dateStr ? ` (${dateStr})` : ''}`, s.summary].join('\n')
|
||||
: [
|
||||
`**${s.title}**${dateStr ? ` (${dateStr})` : ''}`,
|
||||
s.firstUserMsg ? `사용자 요청: ${s.firstUserMsg}` : '',
|
||||
s.lastAssistantExcerpt ? `이전 답변 마지막 부분: …${s.lastAssistantExcerpt}` : '',
|
||||
].filter(Boolean).join('\n');
|
||||
return {
|
||||
id: `mtm-${idx}-${s.id}`,
|
||||
source: 'medium-term-memory',
|
||||
title: s.title || '(untitled session)',
|
||||
content: body,
|
||||
score,
|
||||
tokenEstimate: estimateTokens(body),
|
||||
metadata: { category: 'medium-term', lastUpdated: s.timestamp },
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// ─── Score Normalization ───
|
||||
|
||||
/**
|
||||
@@ -315,6 +467,7 @@ export class RetrievalOrchestrator {
|
||||
'project-memory': 0.85,
|
||||
'long-term-memory': 0.8,
|
||||
'procedural-memory': 0.95, // Procedural is highly specific
|
||||
'medium-term-memory': 0.78, // recent sessions: useful when the user references "last time / yesterday"
|
||||
'episodic-memory': 0.7,
|
||||
'project-scan': 0.6,
|
||||
'recent-knowledge': 0.75
|
||||
|
||||
Reference in New Issue
Block a user