chore: version up to 2.80.35 and package with experience memory

2026-05-12 23:23:23 +09:00
parent 065e598cca
commit f6b27a125b
25 changed files with 1088 additions and 103 deletions
@@ -14,9 +14,10 @@
 import * as fs from 'fs';
 import * as path from 'path';
 import { tokenize, countConflictIndicators } from './scoring';
+import { detectLessonKind } from './lessonHelpers';
 import { logInfo } from '../utils';

-const INDEX_VERSION = 2;
+const INDEX_VERSION = 3;
 const INDEX_DIR = '.astra';
 const INDEX_FILE = 'brain-index.json';
 /** 인덱스가 이 개수를 넘으면 이번 스캔에서 못 본 항목을 정리합니다 (삭제된 파일 누적 방지). */
@@ -32,6 +33,7 @@ interface IndexEntry {
    tokens: string[];       // tokenize(`${title} ${content}`)
    titleTokens: string[];  // tokenize(title)
    conflictCount: number;  // countConflictIndicators(`${title} ${content}`)
+    kind: string;           // '' for an ordinary note, else 'lesson' | 'playbook' | 'qa-finding'
 }

 interface PersistedIndex {
@@ -47,6 +49,8 @@ export interface IndexedBrainDoc {
    titleTokens: string[];
    conflictCount: number;
    mtimeMs: number;
+    /** '' for an ordinary note; 'lesson' | 'playbook' | 'qa-finding' for an Experience-Memory card. */
+    kind: string;
 }

 interface BrainState {
@@ -148,6 +152,7 @@ export function getBrainTokenIndex(brainPath: string, files: string[]): IndexedB
                titleTokens: cached.titleTokens,
                conflictCount: cached.conflictCount || 0,
                mtimeMs: cached.mtimeMs,
+                kind: cached.kind || '',
            });
            continue;
        }
@@ -169,6 +174,7 @@ export function getBrainTokenIndex(brainPath: string, files: string[]): IndexedB
            tokens: tokenize(combined),
            titleTokens: tokenize(title),
            conflictCount: countConflictIndicators(combined),
+            kind: detectLessonKind(relativePath, content),
        };
        st.index.entries[file] = entry;
        st.dirty = true;
@@ -181,6 +187,7 @@ export function getBrainTokenIndex(brainPath: string, files: string[]): IndexedB
            titleTokens: entry.titleTokens,
            conflictCount: entry.conflictCount,
            mtimeMs: entry.mtimeMs,
+            kind: entry.kind,
        });
    }

@@ -96,13 +96,18 @@ export class RetrievalOrchestrator {
            allChunks,
            options.contextBudget
        );
-        fusionLog.push(`Selected: ${selected.length}, Dropped: ${dropped.length}, Tokens: ${tokensUsed}`);
+        // Pull lesson/playbook/qa-finding chunks out so callers can inject them as a prominent
+        // "verify before finalizing" block rather than burying them in the brain-knowledge section.
+        const lessonChunks = selected.filter((c) => c.metadata.isLesson);
+        const selectedChunks = selected.filter((c) => !c.metadata.isLesson);
+        fusionLog.push(`Selected: ${selectedChunks.length} (+${lessonChunks.length} lesson), Dropped: ${dropped.length}, Tokens: ${tokensUsed}`);

        return {
            query,
            totalChunks: allChunks.length,
-            selectedChunks: selected,
+            selectedChunks,
            droppedChunks: dropped,
+            lessonChunks,
            totalTokensUsed: tokensUsed,
            contextBudget: options.contextBudget?.totalBudget || 8000,
            fusionLog
@@ -110,7 +115,7 @@ export class RetrievalOrchestrator {
    }

    /**
-     * 검색 결과를 최종 컨텍스트 문자열로 변환합니다.
+     * 검색 결과를 최종 컨텍스트 문자열로 변환합니다 (레슨 청크는 제외 — 별도 블록으로 주입).
     */
    public buildContextString(result: RetrievalResult): string {
        return assembleContext(result.selectedChunks);
@@ -150,18 +155,42 @@ export class RetrievalOrchestrator {
                }))
            );

+            // Always consider lesson cards for the top slots even if they didn't crack the raw-score top-`limit`:
+            // they're short, high-signal, and we want them surfaced when relevant. We keep the regular top-`limit`
+            // and additively pull in up to a few lesson cards (deduped by index).
+            const ranked = scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score);
+            const pickedIdx = new Set<number>();
+            for (const s of ranked.slice(0, limit)) pickedIdx.add(s.index);
+            const LESSON_EXTRA = 3;
+            let lessonExtra = 0;
+            for (const s of ranked) {
+                if (lessonExtra >= LESSON_EXTRA) break;
+                if (pickedIdx.has(s.index)) continue;
+                if ((indexed[s.index].kind || '') === '') continue;
+                pickedIdx.add(s.index);
+                lessonExtra++;
+            }
+            // Preserve rank order for the chosen set.
+            const chosen = ranked.filter((s) => pickedIdx.has(s.index));
+
            const topResults: RetrievalChunk[] = [];
-            for (const s of scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score).slice(0, limit)) {
+            for (const s of chosen) {
                const doc = indexed[s.index];
-                // Only the top `limit` files are actually read off disk (for excerpt extraction).
+                const isLesson = (doc.kind || '') !== '';
+                // Only the chosen files are actually read off disk (for excerpt extraction).
                let content = '';
                try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — skip */ continue; }
-                const excerpt = extractBestExcerpt(content, expandedTokens, 400);
+                // Lesson cards: hand back the whole card (they're meant to be short) so the Prevention Checklist
+                // survives; fall back to a generous excerpt for long ones. Regular notes: the usual 400-char excerpt.
+                const excerpt = isLesson
+                    ? (content.length <= 2500 ? content.trim() : extractBestExcerpt(content, expandedTokens, 1500))
+                    : extractBestExcerpt(content, expandedTokens, 400);
+                const cap = isLesson ? 2500 : 400;
                topResults.push({
                    id: `brain-${s.index}`,
                    source: 'brain-memory' as const,
                    title: doc.relativePath,
-                    content: summarizeText(excerpt, 400),
+                    content: summarizeText(excerpt, cap),
                    score: s.score,
                    tokenEstimate: estimateTokens(excerpt),
                    metadata: {
@@ -173,6 +202,7 @@ export class RetrievalOrchestrator {
                        conflictDetected: s.conflictDetected,
                        conflictSeverity: s.conflictSeverity,
                        informationDensity: s.informationDensity,
+                        ...(isLesson ? { isLesson: true, lessonKind: doc.kind } : {}),
                    },
                });
            }
@@ -293,6 +323,9 @@ export class RetrievalOrchestrator {
        for (const chunk of chunks) {
            const boost = sourceBoost[chunk.source] || 0.5;
            chunk.score *= boost;
+            // Lesson cards are short, high-signal guardrails — nudge relevant ones above ordinary brain notes
+            // so they survive the budget. Modest (1.4×) so they don't crowd everything out when many match.
+            if (chunk.metadata.isLesson) chunk.score *= 1.4;
        }
    }

@@ -0,0 +1,277 @@
+/**
+ * ============================================================
+ * Lesson / Experience Memory — pure helpers (no vscode dependency)
+ *
+ * "Lesson" = a markdown file in the active brain that captures a past mistake/risk and how to avoid
+ * repeating it. Identified by a `lessons/` / `playbooks/` / `qa-findings/` path segment, or by
+ * frontmatter `type: lesson|playbook|qa-finding`. These are retrieved like any other brain file but
+ * boosted and injected as a prominent "verify before finalizing" checklist (see EXPERIENCE_MEMORY_PLAN.md).
+ * ============================================================
+ */
+
+import { tokenize } from './scoring';
+
+/** Path segments that mark a file as lesson-like. */
+export const LESSON_DIR_RE = /(^|[\\/])(lessons?|playbooks?|qa[-_]?findings?)([\\/]|$)/i;
+
+export type LessonKind = 'lesson' | 'playbook' | 'qa-finding';
+
+/**
+ * Decide whether a brain file is a lesson (and which kind). Cheap — only looks at the relative path
+ * and, if present, the YAML-ish frontmatter at the top of `content`.
+ *
+ * @returns the kind string, or '' for an ordinary note.
+ */
+export function detectLessonKind(relativePath: string, content: string): LessonKind | '' {
+    // 1) Frontmatter `type:` wins if present.
+    const fm = parseFrontmatterType(content);
+    if (fm === 'lesson' || fm === 'playbook' || fm === 'qa-finding') return fm;
+    // 2) Otherwise infer from the path.
+    const m = LESSON_DIR_RE.exec(relativePath || '');
+    if (!m) return '';
+    const seg = m[2].toLowerCase();
+    if (seg.startsWith('playbook')) return 'playbook';
+    if (seg.startsWith('qa')) return 'qa-finding';
+    return 'lesson';
+}
+
+/** Pull the `type:` value out of a leading `--- ... ---` frontmatter block. Returns '' if absent. */
+function parseFrontmatterType(content: string): string {
+    if (!content) return '';
+    const head = content.slice(0, 800);
+    if (!/^?---\s*\n/.test(head)) return '';
+    const end = head.indexOf('\n---', 4);
+    if (end < 0) return '';
+    const block = head.slice(0, end);
+    const m = block.match(/^\s*type\s*:\s*["']?([a-zA-Z-]+)["']?\s*$/m);
+    return m ? m[1].trim().toLowerCase() : '';
+}
+
+/** Extract the "## Prevention Checklist" bullet list from a lesson card, if present. */
+export function extractPreventionChecklist(content: string): string[] {
+    if (!content) return [];
+    const m = content.match(/^#{1,6}\s*(?:prevention\s*checklist|prevention|체크리스트|예방\s*체크리스트)\s*$/im);
+    if (!m || m.index === undefined) return [];
+    const after = content.slice(m.index + m[0].length);
+    // Stop at the next heading.
+    const stop = after.search(/\n#{1,6}\s/);
+    const section = stop >= 0 ? after.slice(0, stop) : after;
+    return section
+        .split('\n')
+        .map((l) => l.trim())
+        .filter((l) => /^[-*]\s+/.test(l))
+        .map((l) => l.replace(/^[-*]\s+/, '').trim())
+        .filter(Boolean);
+}
+
+export interface LessonChunkLite {
+    title: string;       // relative path / display title
+    content: string;     // excerpt or full card text
+}
+
+/**
+ * Build the prompt block injected ahead of the regular RAG context. Kept compact; if a card has a
+ * parseable Prevention Checklist we surface just that, otherwise the card text.
+ */
+export function buildLessonChecklistBlock(chunks: LessonChunkLite[]): string {
+    if (!chunks || chunks.length === 0) return '';
+    const sections: string[] = [];
+    for (const c of chunks) {
+        const checklist = extractPreventionChecklist(c.content);
+        const body = checklist.length > 0
+            ? checklist.map((item) => `- [ ] ${item}`).join('\n')
+            : c.content.trim();
+        sections.push(`### ${c.title}\n${body}`);
+    }
+    return [
+        '[⚠ ACTIVE LESSONS — verify these BEFORE finalizing your answer]',
+        'These are recorded lessons from past work on this project. Read them first and make sure you are NOT',
+        'about to repeat any of the mistakes / skip any of the precautions below. If a checklist item is relevant',
+        'to the current request, explicitly confirm it in your answer. If a lesson conflicts with the user, prefer',
+        'the user but flag the conflict.',
+        '',
+        sections.join('\n\n'),
+        '',
+        '[END ACTIVE LESSONS]',
+    ].join('\n');
+}
+
+/**
+ * A starter lesson card written by the `g1nation.lesson.create` / `…fromConversation` commands for
+ * the user to fill in. If `situation` is given (e.g. captured from the recent chat turn), it pre-fills
+ * the Situation section.
+ */
+export function lessonTemplate(title: string, today: string, situation?: string): string {
+    const safeTitle = (title || 'Untitled lesson').replace(/\n/g, ' ').trim();
+    const situationBody = (situation && situation.trim()) ? situation.trim() : '<무슨 작업/맥락이었는지>';
+    return [
+        '---',
+        'type: lesson',
+        `title: ${safeTitle}`,
+        'applies-to: []',
+        'severity: medium',
+        'source: curated',
+        'occurrences: 1',
+        `last-seen: ${today}`,
+        '---',
+        '',
+        `# Lesson: ${safeTitle}`,
+        '',
+        '## Situation',
+        situationBody,
+        '',
+        '## Mistake / Risk',
+        '<무엇이 잘못됐거나 위험했는지>',
+        '',
+        '## Root Cause',
+        '<왜 그렇게 됐는지 — 표면 증상이 아니라 근본 원인>',
+        '',
+        '## Fix',
+        '<어떻게 고쳤는지>',
+        '',
+        '## Prevention Checklist',
+        '- <다음에 비슷한 작업을 할 때 반드시 확인할 것>',
+        '- ',
+        '',
+        '## Applies To',
+        '- <태그: 기능/영역 이름>',
+        '',
+    ].join('\n');
+}
+
+/** Filesystem-safe slug for a lesson filename. */
+export function lessonSlug(title: string): string {
+    const base = (title || 'lesson')
+        .toLowerCase()
+        .replace(/[^a-z0-9가-힣]+/g, '-')
+        .slice(0, 60)
+        .replace(/^-+|-+$/g, '');
+    return base || 'lesson';
+}
+
+// ── QA-feedback (regression complaint) detection ─────────────────────────────
+
+/**
+ * Heuristic: does this user message look like "you broke something again / same mistake / why does
+ * this keep happening"? If so, the host offers to record a lesson. Deliberately conservative — false
+ * positives just show a dismissible prompt, but we'd rather not nag.
+ */
+const QA_REGRESSION_PATTERNS: RegExp[] = [
+    /또\s*(안\s*돼|안되|이래|발생|터졌|깨졌|망가졌)/,
+    /(다시|또)\s*같은\s*(실수|문제|버그|에러|오류)/,
+    /(비슷한|똑같은)\s*(실수|문제|버그|이슈|패턴)/,
+    /왜\s*(자꾸|계속|반복|또)/,
+    /(고쳤는데|수정했는데|패치했는데|바꿨는데)\s*(또|다시|여전히|아직).{0,20}(안|깨|망|문제|에러|오류|실패|broke|broken)/i,
+    /(여전히|아직도)\s*(안\s*돼|안되|버그|깨|문제|실패)/,
+    /regress(ion|ed)?/i,
+    /\b(broke|broken|failing|still\s+broken|same\s+(bug|mistake|issue|error)|again)\b.{0,40}\b(again|still|repeat|recurr)/i,
+    /\bwhy\b.{0,30}\b(keep|again|repeatedly|recurr)/i,
+];
+export function isQaRegressionFeedback(prompt: string): boolean {
+    if (!prompt) return false;
+    const t = prompt.trim();
+    if (t.length < 4 || t.length > 4000) return false;
+    return QA_REGRESSION_PATTERNS.some((re) => re.test(t));
+}
+
+// ── Lesson frontmatter parse / occurrences bump (for dedup-merge) ────────────
+
+export interface LessonFrontmatter {
+    type?: string;
+    title?: string;
+    occurrences?: number;
+    appliesTo?: string[];
+}
+
+/** Parse the leading `--- ... ---` block. Returns {} when there is no frontmatter. */
+export function parseLessonFrontmatter(content: string): LessonFrontmatter {
+    if (!content) return {};
+    const head = content.slice(0, 2000);
+    if (!/^?---\s*\n/.test(head)) return {};
+    const end = head.indexOf('\n---', 4);
+    if (end < 0) return {};
+    const block = head.slice(0, end);
+    const get = (key: string) => {
+        const m = block.match(new RegExp(`^\\s*${key}\\s*:\\s*(.+?)\\s*$`, 'm'));
+        return m ? m[1].replace(/^["']|["']$/g, '').trim() : undefined;
+    };
+    const occ = get('occurrences');
+    const tags = get('applies-to');
+    let appliesTo: string[] | undefined;
+    if (tags) {
+        const inner = tags.replace(/^\[|\]$/g, '').trim();
+        appliesTo = inner ? inner.split(',').map((s) => s.trim().replace(/^["']|["']$/g, '').trim()).filter(Boolean) : [];
+    }
+    return {
+        type: get('type')?.toLowerCase(),
+        title: get('title'),
+        occurrences: occ !== undefined && Number.isFinite(Number(occ)) ? Number(occ) : undefined,
+        appliesTo,
+    };
+}
+
+/** Normalize a lesson title for equality matching (lowercase, strip punctuation/whitespace). */
+export function normalizeLessonTitle(title: string): string {
+    return (title || '').toLowerCase().replace(/[^a-z0-9가-힣]+/g, '');
+}
+
+/**
+ * Return `content` with the frontmatter's `occurrences:` incremented by 1 and `last-seen:` set to
+ * `today`. If the keys are missing they're inserted just inside the frontmatter block. If there is
+ * no frontmatter at all, `content` is returned unchanged (caller decides what to do).
+ */
+export function bumpLessonOccurrences(content: string, today: string): string {
+    if (!/^?---\s*\n/.test(content)) return content;
+    const end = content.indexOf('\n---', 4);
+    if (end < 0) return content;
+    let block = content.slice(0, end);
+    const rest = content.slice(end);
+    const cur = parseLessonFrontmatter(content).occurrences ?? 1;
+    if (/^\s*occurrences\s*:/m.test(block)) {
+        block = block.replace(/^(\s*occurrences\s*:\s*).*$/m, `$1${cur + 1}`);
+    } else {
+        block += `\noccurrences: ${cur + 1}`;
+    }
+    if (/^\s*last-seen\s*:/m.test(block)) {
+        block = block.replace(/^(\s*last-seen\s*:\s*).*$/m, `$1${today}`);
+    } else {
+        block += `\nlast-seen: ${today}`;
+    }
+    return block + rest;
+}
+
+// ── Post-answer checklist coverage (non-blocking flag) ──────────────────────
+
+/** "Significant" words of a checklist item — drops placeholders, punctuation, very short tokens. */
+function checklistItemTerms(item: string): string[] {
+    if (/^</.test(item.trim())) return []; // template placeholder like "<다음에 확인할 것>"
+    return Array.from(new Set(tokenize(item))).filter((t) => t.length >= 2);
+}
+
+/**
+ * Given the assistant's answer and the lesson cards injected this turn, return Prevention-Checklist
+ * items that the answer does not visibly address (zero of their significant terms appear). Conservative
+ * by design — only flags items with at least 2 significant terms and a real, non-placeholder body.
+ * Capped at `max` items so the footer doesn't get noisy.
+ */
+export function findUnaddressedChecklistItems(answer: string, lessonContents: string[], max = 3): string[] {
+    if (!answer || !lessonContents || lessonContents.length === 0) return [];
+    const answerTerms = new Set(tokenize(answer));
+    const out: string[] = [];
+    const seen = new Set<string>();
+    for (const content of lessonContents) {
+        for (const item of extractPreventionChecklist(content)) {
+            const key = normalizeLessonTitle(item);
+            if (!key || seen.has(key)) continue;
+            const terms = checklistItemTerms(item);
+            if (terms.length < 2) continue; // too vague to judge
+            const covered = terms.some((t) => answerTerms.has(t));
+            if (!covered) {
+                out.push(item);
+                seen.add(key);
+                if (out.length >= max) return out;
+            }
+        }
+    }
+    return out;
+}
@@ -31,11 +31,17 @@ export interface RetrievalChunk {
        category?: string;
        isProjectEvidence?: boolean;
        lastUpdated?: number;
-        
+
        // --- Scoring Intelligence (v2.75.0+) ---
        conflictDetected?: boolean;
        conflictSeverity?: ConflictSeverity;
        informationDensity?: number;
+
+        // --- Experience Memory ---
+        /** True when this chunk comes from a lesson / playbook / qa-finding card in the brain. */
+        isLesson?: boolean;
+        /** 'lesson' | 'playbook' | 'qa-finding' when isLesson is true. */
+        lessonKind?: string;
    };
 }

@@ -44,6 +50,8 @@ export interface RetrievalResult {
    totalChunks: number;
    selectedChunks: RetrievalChunk[];
    droppedChunks: RetrievalChunk[];
+    /** Lesson/playbook/qa-finding chunks that survived the budget — pulled out so callers can inject them prominently. */
+    lessonChunks: RetrievalChunk[];
    totalTokensUsed: number;
    contextBudget: number;
    fusionLog: string[];      // 디버그용 융합 로그