chore: version up to 2.80.35 and package with experience memory

This commit is contained in:
g1nation
2026-05-12 23:23:23 +09:00
parent 065e598cca
commit f6b27a125b
25 changed files with 1088 additions and 103 deletions
+8 -1
View File
@@ -14,9 +14,10 @@
import * as fs from 'fs';
import * as path from 'path';
import { tokenize, countConflictIndicators } from './scoring';
import { detectLessonKind } from './lessonHelpers';
import { logInfo } from '../utils';
const INDEX_VERSION = 2;
const INDEX_VERSION = 3;
const INDEX_DIR = '.astra';
const INDEX_FILE = 'brain-index.json';
/** 인덱스가 이 개수를 넘으면 이번 스캔에서 못 본 항목을 정리합니다 (삭제된 파일 누적 방지). */
@@ -32,6 +33,7 @@ interface IndexEntry {
tokens: string[]; // tokenize(`${title} ${content}`)
titleTokens: string[]; // tokenize(title)
conflictCount: number; // countConflictIndicators(`${title} ${content}`)
kind: string; // '' for an ordinary note, else 'lesson' | 'playbook' | 'qa-finding'
}
interface PersistedIndex {
@@ -47,6 +49,8 @@ export interface IndexedBrainDoc {
titleTokens: string[];
conflictCount: number;
mtimeMs: number;
/** '' for an ordinary note; 'lesson' | 'playbook' | 'qa-finding' for an Experience-Memory card. */
kind: string;
}
interface BrainState {
@@ -148,6 +152,7 @@ export function getBrainTokenIndex(brainPath: string, files: string[]): IndexedB
titleTokens: cached.titleTokens,
conflictCount: cached.conflictCount || 0,
mtimeMs: cached.mtimeMs,
kind: cached.kind || '',
});
continue;
}
@@ -169,6 +174,7 @@ export function getBrainTokenIndex(brainPath: string, files: string[]): IndexedB
tokens: tokenize(combined),
titleTokens: tokenize(title),
conflictCount: countConflictIndicators(combined),
kind: detectLessonKind(relativePath, content),
};
st.index.entries[file] = entry;
st.dirty = true;
@@ -181,6 +187,7 @@ export function getBrainTokenIndex(brainPath: string, files: string[]): IndexedB
titleTokens: entry.titleTokens,
conflictCount: entry.conflictCount,
mtimeMs: entry.mtimeMs,
kind: entry.kind,
});
}
+40 -7
View File
@@ -96,13 +96,18 @@ export class RetrievalOrchestrator {
allChunks,
options.contextBudget
);
fusionLog.push(`Selected: ${selected.length}, Dropped: ${dropped.length}, Tokens: ${tokensUsed}`);
// Pull lesson/playbook/qa-finding chunks out so callers can inject them as a prominent
// "verify before finalizing" block rather than burying them in the brain-knowledge section.
const lessonChunks = selected.filter((c) => c.metadata.isLesson);
const selectedChunks = selected.filter((c) => !c.metadata.isLesson);
fusionLog.push(`Selected: ${selectedChunks.length} (+${lessonChunks.length} lesson), Dropped: ${dropped.length}, Tokens: ${tokensUsed}`);
return {
query,
totalChunks: allChunks.length,
selectedChunks: selected,
selectedChunks,
droppedChunks: dropped,
lessonChunks,
totalTokensUsed: tokensUsed,
contextBudget: options.contextBudget?.totalBudget || 8000,
fusionLog
@@ -110,7 +115,7 @@ export class RetrievalOrchestrator {
}
/**
* 검색 결과를 최종 컨텍스트 문자열로 변환합니다.
* 검색 결과를 최종 컨텍스트 문자열로 변환합니다 (레슨 청크는 제외 — 별도 블록으로 주입).
*/
public buildContextString(result: RetrievalResult): string {
return assembleContext(result.selectedChunks);
@@ -150,18 +155,42 @@ export class RetrievalOrchestrator {
}))
);
// Always consider lesson cards for the top slots even if they didn't crack the raw-score top-`limit`:
// they're short, high-signal, and we want them surfaced when relevant. We keep the regular top-`limit`
// and additively pull in up to a few lesson cards (deduped by index).
const ranked = scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score);
const pickedIdx = new Set<number>();
for (const s of ranked.slice(0, limit)) pickedIdx.add(s.index);
const LESSON_EXTRA = 3;
let lessonExtra = 0;
for (const s of ranked) {
if (lessonExtra >= LESSON_EXTRA) break;
if (pickedIdx.has(s.index)) continue;
if ((indexed[s.index].kind || '') === '') continue;
pickedIdx.add(s.index);
lessonExtra++;
}
// Preserve rank order for the chosen set.
const chosen = ranked.filter((s) => pickedIdx.has(s.index));
const topResults: RetrievalChunk[] = [];
for (const s of scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score).slice(0, limit)) {
for (const s of chosen) {
const doc = indexed[s.index];
// Only the top `limit` files are actually read off disk (for excerpt extraction).
const isLesson = (doc.kind || '') !== '';
// Only the chosen files are actually read off disk (for excerpt extraction).
let content = '';
try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — skip */ continue; }
const excerpt = extractBestExcerpt(content, expandedTokens, 400);
// Lesson cards: hand back the whole card (they're meant to be short) so the Prevention Checklist
// survives; fall back to a generous excerpt for long ones. Regular notes: the usual 400-char excerpt.
const excerpt = isLesson
? (content.length <= 2500 ? content.trim() : extractBestExcerpt(content, expandedTokens, 1500))
: extractBestExcerpt(content, expandedTokens, 400);
const cap = isLesson ? 2500 : 400;
topResults.push({
id: `brain-${s.index}`,
source: 'brain-memory' as const,
title: doc.relativePath,
content: summarizeText(excerpt, 400),
content: summarizeText(excerpt, cap),
score: s.score,
tokenEstimate: estimateTokens(excerpt),
metadata: {
@@ -173,6 +202,7 @@ export class RetrievalOrchestrator {
conflictDetected: s.conflictDetected,
conflictSeverity: s.conflictSeverity,
informationDensity: s.informationDensity,
...(isLesson ? { isLesson: true, lessonKind: doc.kind } : {}),
},
});
}
@@ -293,6 +323,9 @@ export class RetrievalOrchestrator {
for (const chunk of chunks) {
const boost = sourceBoost[chunk.source] || 0.5;
chunk.score *= boost;
// Lesson cards are short, high-signal guardrails — nudge relevant ones above ordinary brain notes
// so they survive the budget. Modest (1.4×) so they don't crowd everything out when many match.
if (chunk.metadata.isLesson) chunk.score *= 1.4;
}
}
+277
View File
@@ -0,0 +1,277 @@
/**
* ============================================================
* Lesson / Experience Memory — pure helpers (no vscode dependency)
*
* "Lesson" = a markdown file in the active brain that captures a past mistake/risk and how to avoid
* repeating it. Identified by a `lessons/` / `playbooks/` / `qa-findings/` path segment, or by
* frontmatter `type: lesson|playbook|qa-finding`. These are retrieved like any other brain file but
* boosted and injected as a prominent "verify before finalizing" checklist (see EXPERIENCE_MEMORY_PLAN.md).
* ============================================================
*/
import { tokenize } from './scoring';
/** Path segments that mark a file as lesson-like. */
export const LESSON_DIR_RE = /(^|[\\/])(lessons?|playbooks?|qa[-_]?findings?)([\\/]|$)/i;
export type LessonKind = 'lesson' | 'playbook' | 'qa-finding';
/**
* Decide whether a brain file is a lesson (and which kind). Cheap — only looks at the relative path
* and, if present, the YAML-ish frontmatter at the top of `content`.
*
* @returns the kind string, or '' for an ordinary note.
*/
export function detectLessonKind(relativePath: string, content: string): LessonKind | '' {
// 1) Frontmatter `type:` wins if present.
const fm = parseFrontmatterType(content);
if (fm === 'lesson' || fm === 'playbook' || fm === 'qa-finding') return fm;
// 2) Otherwise infer from the path.
const m = LESSON_DIR_RE.exec(relativePath || '');
if (!m) return '';
const seg = m[2].toLowerCase();
if (seg.startsWith('playbook')) return 'playbook';
if (seg.startsWith('qa')) return 'qa-finding';
return 'lesson';
}
/** Pull the `type:` value out of a leading `--- ... ---` frontmatter block. Returns '' if absent. */
function parseFrontmatterType(content: string): string {
if (!content) return '';
const head = content.slice(0, 800);
if (!/^?---\s*\n/.test(head)) return '';
const end = head.indexOf('\n---', 4);
if (end < 0) return '';
const block = head.slice(0, end);
const m = block.match(/^\s*type\s*:\s*["']?([a-zA-Z-]+)["']?\s*$/m);
return m ? m[1].trim().toLowerCase() : '';
}
/** Extract the "## Prevention Checklist" bullet list from a lesson card, if present. */
export function extractPreventionChecklist(content: string): string[] {
if (!content) return [];
const m = content.match(/^#{1,6}\s*(?:prevention\s*checklist|prevention|체크리스트|예방\s*체크리스트)\s*$/im);
if (!m || m.index === undefined) return [];
const after = content.slice(m.index + m[0].length);
// Stop at the next heading.
const stop = after.search(/\n#{1,6}\s/);
const section = stop >= 0 ? after.slice(0, stop) : after;
return section
.split('\n')
.map((l) => l.trim())
.filter((l) => /^[-*]\s+/.test(l))
.map((l) => l.replace(/^[-*]\s+/, '').trim())
.filter(Boolean);
}
export interface LessonChunkLite {
title: string; // relative path / display title
content: string; // excerpt or full card text
}
/**
* Build the prompt block injected ahead of the regular RAG context. Kept compact; if a card has a
* parseable Prevention Checklist we surface just that, otherwise the card text.
*/
export function buildLessonChecklistBlock(chunks: LessonChunkLite[]): string {
if (!chunks || chunks.length === 0) return '';
const sections: string[] = [];
for (const c of chunks) {
const checklist = extractPreventionChecklist(c.content);
const body = checklist.length > 0
? checklist.map((item) => `- [ ] ${item}`).join('\n')
: c.content.trim();
sections.push(`### ${c.title}\n${body}`);
}
return [
'[⚠ ACTIVE LESSONS — verify these BEFORE finalizing your answer]',
'These are recorded lessons from past work on this project. Read them first and make sure you are NOT',
'about to repeat any of the mistakes / skip any of the precautions below. If a checklist item is relevant',
'to the current request, explicitly confirm it in your answer. If a lesson conflicts with the user, prefer',
'the user but flag the conflict.',
'',
sections.join('\n\n'),
'',
'[END ACTIVE LESSONS]',
].join('\n');
}
/**
* A starter lesson card written by the `g1nation.lesson.create` / `…fromConversation` commands for
* the user to fill in. If `situation` is given (e.g. captured from the recent chat turn), it pre-fills
* the Situation section.
*/
export function lessonTemplate(title: string, today: string, situation?: string): string {
const safeTitle = (title || 'Untitled lesson').replace(/\n/g, ' ').trim();
const situationBody = (situation && situation.trim()) ? situation.trim() : '<무슨 작업/맥락이었는지>';
return [
'---',
'type: lesson',
`title: ${safeTitle}`,
'applies-to: []',
'severity: medium',
'source: curated',
'occurrences: 1',
`last-seen: ${today}`,
'---',
'',
`# Lesson: ${safeTitle}`,
'',
'## Situation',
situationBody,
'',
'## Mistake / Risk',
'<무엇이 잘못됐거나 위험했는지>',
'',
'## Root Cause',
'<왜 그렇게 됐는지 — 표면 증상이 아니라 근본 원인>',
'',
'## Fix',
'<어떻게 고쳤는지>',
'',
'## Prevention Checklist',
'- <다음에 비슷한 작업을 할 때 반드시 확인할 것>',
'- ',
'',
'## Applies To',
'- <태그: 기능/영역 이름>',
'',
].join('\n');
}
/** Filesystem-safe slug for a lesson filename. */
export function lessonSlug(title: string): string {
const base = (title || 'lesson')
.toLowerCase()
.replace(/[^a-z0-9가-힣]+/g, '-')
.slice(0, 60)
.replace(/^-+|-+$/g, '');
return base || 'lesson';
}
// ── QA-feedback (regression complaint) detection ─────────────────────────────
/**
* Heuristic: does this user message look like "you broke something again / same mistake / why does
* this keep happening"? If so, the host offers to record a lesson. Deliberately conservative — false
* positives just show a dismissible prompt, but we'd rather not nag.
*/
const QA_REGRESSION_PATTERNS: RegExp[] = [
/또\s*(안\s*돼|안되|이래|발생|터졌|깨졌|망가졌)/,
/(다시|또)\s*같은\s*(실수|문제|버그|에러|오류)/,
/(비슷한|똑같은)\s*(실수|문제|버그|이슈|패턴)/,
/왜\s*(자꾸|계속|반복|또)/,
/(고쳤는데|수정했는데|패치했는데|바꿨는데)\s*(또|다시|여전히|아직).{0,20}(안|깨|망|문제|에러|오류|실패|broke|broken)/i,
/(여전히|아직도)\s*(안\s*돼|안되|버그|깨|문제|실패)/,
/regress(ion|ed)?/i,
/\b(broke|broken|failing|still\s+broken|same\s+(bug|mistake|issue|error)|again)\b.{0,40}\b(again|still|repeat|recurr)/i,
/\bwhy\b.{0,30}\b(keep|again|repeatedly|recurr)/i,
];
export function isQaRegressionFeedback(prompt: string): boolean {
if (!prompt) return false;
const t = prompt.trim();
if (t.length < 4 || t.length > 4000) return false;
return QA_REGRESSION_PATTERNS.some((re) => re.test(t));
}
// ── Lesson frontmatter parse / occurrences bump (for dedup-merge) ────────────
export interface LessonFrontmatter {
type?: string;
title?: string;
occurrences?: number;
appliesTo?: string[];
}
/** Parse the leading `--- ... ---` block. Returns {} when there is no frontmatter. */
export function parseLessonFrontmatter(content: string): LessonFrontmatter {
if (!content) return {};
const head = content.slice(0, 2000);
if (!/^?---\s*\n/.test(head)) return {};
const end = head.indexOf('\n---', 4);
if (end < 0) return {};
const block = head.slice(0, end);
const get = (key: string) => {
const m = block.match(new RegExp(`^\\s*${key}\\s*:\\s*(.+?)\\s*$`, 'm'));
return m ? m[1].replace(/^["']|["']$/g, '').trim() : undefined;
};
const occ = get('occurrences');
const tags = get('applies-to');
let appliesTo: string[] | undefined;
if (tags) {
const inner = tags.replace(/^\[|\]$/g, '').trim();
appliesTo = inner ? inner.split(',').map((s) => s.trim().replace(/^["']|["']$/g, '').trim()).filter(Boolean) : [];
}
return {
type: get('type')?.toLowerCase(),
title: get('title'),
occurrences: occ !== undefined && Number.isFinite(Number(occ)) ? Number(occ) : undefined,
appliesTo,
};
}
/** Normalize a lesson title for equality matching (lowercase, strip punctuation/whitespace). */
export function normalizeLessonTitle(title: string): string {
return (title || '').toLowerCase().replace(/[^a-z0-9가-힣]+/g, '');
}
/**
* Return `content` with the frontmatter's `occurrences:` incremented by 1 and `last-seen:` set to
* `today`. If the keys are missing they're inserted just inside the frontmatter block. If there is
* no frontmatter at all, `content` is returned unchanged (caller decides what to do).
*/
export function bumpLessonOccurrences(content: string, today: string): string {
if (!/^?---\s*\n/.test(content)) return content;
const end = content.indexOf('\n---', 4);
if (end < 0) return content;
let block = content.slice(0, end);
const rest = content.slice(end);
const cur = parseLessonFrontmatter(content).occurrences ?? 1;
if (/^\s*occurrences\s*:/m.test(block)) {
block = block.replace(/^(\s*occurrences\s*:\s*).*$/m, `$1${cur + 1}`);
} else {
block += `\noccurrences: ${cur + 1}`;
}
if (/^\s*last-seen\s*:/m.test(block)) {
block = block.replace(/^(\s*last-seen\s*:\s*).*$/m, `$1${today}`);
} else {
block += `\nlast-seen: ${today}`;
}
return block + rest;
}
// ── Post-answer checklist coverage (non-blocking flag) ──────────────────────
/** "Significant" words of a checklist item — drops placeholders, punctuation, very short tokens. */
function checklistItemTerms(item: string): string[] {
if (/^</.test(item.trim())) return []; // template placeholder like "<다음에 확인할 것>"
return Array.from(new Set(tokenize(item))).filter((t) => t.length >= 2);
}
/**
* Given the assistant's answer and the lesson cards injected this turn, return Prevention-Checklist
* items that the answer does not visibly address (zero of their significant terms appear). Conservative
* by design — only flags items with at least 2 significant terms and a real, non-placeholder body.
* Capped at `max` items so the footer doesn't get noisy.
*/
export function findUnaddressedChecklistItems(answer: string, lessonContents: string[], max = 3): string[] {
if (!answer || !lessonContents || lessonContents.length === 0) return [];
const answerTerms = new Set(tokenize(answer));
const out: string[] = [];
const seen = new Set<string>();
for (const content of lessonContents) {
for (const item of extractPreventionChecklist(content)) {
const key = normalizeLessonTitle(item);
if (!key || seen.has(key)) continue;
const terms = checklistItemTerms(item);
if (terms.length < 2) continue; // too vague to judge
const covered = terms.some((t) => answerTerms.has(t));
if (!covered) {
out.push(item);
seen.add(key);
if (out.length >= max) return out;
}
}
}
return out;
}
+9 -1
View File
@@ -31,11 +31,17 @@ export interface RetrievalChunk {
category?: string;
isProjectEvidence?: boolean;
lastUpdated?: number;
// --- Scoring Intelligence (v2.75.0+) ---
conflictDetected?: boolean;
conflictSeverity?: ConflictSeverity;
informationDensity?: number;
// --- Experience Memory ---
/** True when this chunk comes from a lesson / playbook / qa-finding card in the brain. */
isLesson?: boolean;
/** 'lesson' | 'playbook' | 'qa-finding' when isLesson is true. */
lessonKind?: string;
};
}
@@ -44,6 +50,8 @@ export interface RetrievalResult {
totalChunks: number;
selectedChunks: RetrievalChunk[];
droppedChunks: RetrievalChunk[];
/** Lesson/playbook/qa-finding chunks that survived the budget — pulled out so callers can inject them prominently. */
lessonChunks: RetrievalChunk[];
totalTokensUsed: number;
contextBudget: number;
fusionLog: string[]; // 디버그용 융합 로그