/** * ============================================================ * Lesson / Experience Memory — pure helpers (no vscode dependency) * * "Lesson" = a markdown file in the active brain that captures a past mistake/risk and how to avoid * repeating it. Identified by a `lessons/` / `playbooks/` / `qa-findings/` path segment, or by * frontmatter `type: lesson|playbook|qa-finding`. These are retrieved like any other brain file but * boosted and injected as a prominent "verify before finalizing" checklist (see EXPERIENCE_MEMORY_PLAN.md). * ============================================================ */ import { tokenize } from './scoring'; /** Path segments that mark a file as lesson-like. */ export const LESSON_DIR_RE = /(^|[\\/])(lessons?|playbooks?|qa[-_]?findings?)([\\/]|$)/i; export type LessonKind = 'lesson' | 'playbook' | 'qa-finding'; /** * Decide whether a brain file is a lesson (and which kind). Cheap — only looks at the relative path * and, if present, the YAML-ish frontmatter at the top of `content`. * * @returns the kind string, or '' for an ordinary note. */ export function detectLessonKind(relativePath: string, content: string): LessonKind | '' { // 1) Frontmatter `type:` wins if present. const fm = parseFrontmatterType(content); if (fm === 'lesson' || fm === 'playbook' || fm === 'qa-finding') return fm; // 2) Otherwise infer from the path. const m = LESSON_DIR_RE.exec(relativePath || ''); if (!m) return ''; const seg = m[2].toLowerCase(); if (seg.startsWith('playbook')) return 'playbook'; if (seg.startsWith('qa')) return 'qa-finding'; return 'lesson'; } /** Pull the `type:` value out of a leading `--- ... ---` frontmatter block. Returns '' if absent. */ function parseFrontmatterType(content: string): string { if (!content) return ''; const head = content.slice(0, 800); if (!/^?---\s*\n/.test(head)) return ''; const end = head.indexOf('\n---', 4); if (end < 0) return ''; const block = head.slice(0, end); const m = block.match(/^\s*type\s*:\s*["']?([a-zA-Z-]+)["']?\s*$/m); return m ? m[1].trim().toLowerCase() : ''; } /** * Pull a specific markdown section ("## NAME ... up to the next heading") from a lesson card. * Returns trimmed body text, or '' if the heading isn't found. */ function extractSection(content: string, headingRe: RegExp): string { const m = content.match(headingRe); if (!m || m.index === undefined) return ''; const after = content.slice(m.index + m[0].length); const stop = after.search(/\n#{1,6}\s/); const section = stop >= 0 ? after.slice(0, stop) : after; return section.trim(); } /** * Slim a lesson card down to the sections that actually matter for guardrails: * Mistake / Risk, Root Cause, Fix, and Prevention Checklist. Drops Situation, * Applies-To, and any verbose narrative. Returned text is markdown-compatible * with the original headings so the model still sees the structure. * * Falls back to the original content (clipped to `maxLen`) if no recognised * sections are found — keeps backwards-compat for old lessons that don't * follow the current template. * * Why: lesson cards are loaded at 2500 chars each and three cards can eat * ~11K tokens. The essence sections are usually <600 chars total per card, * which trims retrieval tokens by ~70% without losing the signal. */ export function extractLessonEssence(content: string, maxLen = 1200): string { if (!content) return ''; const sections: Array<{ heading: string; body: string }> = []; const want: Array<[string, RegExp]> = [ ['## Mistake / Risk', /^#{1,6}\s*(?:mistake\s*\/?\s*risk|mistake|risk|실수|문제)\s*$/im], ['## Root Cause', /^#{1,6}\s*(?:root\s*cause|근본\s*원인|원인)\s*$/im], ['## Fix', /^#{1,6}\s*(?:fix|해결|수정)\s*$/im], ['## Prevention Checklist', /^#{1,6}\s*(?:prevention\s*checklist|prevention|체크리스트|예방\s*체크리스트)\s*$/im], ]; for (const [heading, re] of want) { const body = extractSection(content, re); if (body && !/^<[^>]+>$/.test(body)) sections.push({ heading, body }); } if (sections.length === 0) { return content.length <= maxLen ? content.trim() : content.slice(0, maxLen).trim() + '\n…'; } let assembled = sections.map((s) => `${s.heading}\n${s.body}`).join('\n\n'); if (assembled.length > maxLen) assembled = assembled.slice(0, maxLen).trim() + '\n…'; return assembled; } /** Extract the "## Prevention Checklist" bullet list from a lesson card, if present. */ export function extractPreventionChecklist(content: string): string[] { if (!content) return []; const m = content.match(/^#{1,6}\s*(?:prevention\s*checklist|prevention|체크리스트|예방\s*체크리스트)\s*$/im); if (!m || m.index === undefined) return []; const after = content.slice(m.index + m[0].length); // Stop at the next heading. const stop = after.search(/\n#{1,6}\s/); const section = stop >= 0 ? after.slice(0, stop) : after; return section .split('\n') .map((l) => l.trim()) .filter((l) => /^[-*]\s+/.test(l)) .map((l) => l.replace(/^[-*]\s+/, '').trim()) .filter(Boolean); } export interface LessonChunkLite { title: string; // relative path / display title content: string; // excerpt or full card text } /** * Build the prompt block injected ahead of the regular RAG context. Kept compact; if a card has a * parseable Prevention Checklist we surface just that, otherwise the card text. */ export function buildLessonChecklistBlock(chunks: LessonChunkLite[]): string { if (!chunks || chunks.length === 0) return ''; const sections: string[] = []; for (const c of chunks) { const checklist = extractPreventionChecklist(c.content); const body = checklist.length > 0 ? checklist.map((item) => `- [ ] ${item}`).join('\n') : c.content.trim(); sections.push(`### ${c.title}\n${body}`); } return [ '[⚠ ACTIVE LESSONS — verify these BEFORE finalizing your answer]', 'These are recorded lessons from past work on this project. Read them first and make sure you are NOT', 'about to repeat any of the mistakes / skip any of the precautions below. If a checklist item is relevant', 'to the current request, explicitly confirm it in your answer. If a lesson conflicts with the user, prefer', 'the user but flag the conflict.', '', sections.join('\n\n'), '', '[END ACTIVE LESSONS]', ].join('\n'); } /** * A starter lesson card written by the `g1nation.lesson.create` / `…fromConversation` commands for * the user to fill in. If `situation` is given (e.g. captured from the recent chat turn), it pre-fills * the Situation section. */ export function lessonTemplate(title: string, today: string, situation?: string): string { const safeTitle = (title || 'Untitled lesson').replace(/\n/g, ' ').trim(); const situationBody = (situation && situation.trim()) ? situation.trim() : '<무슨 작업/맥락이었는지>'; return [ '---', 'type: lesson', `title: ${safeTitle}`, 'applies-to: []', 'severity: medium', 'source: curated', 'occurrences: 1', `last-seen: ${today}`, '---', '', `# Lesson: ${safeTitle}`, '', '## Situation', situationBody, '', '## Mistake / Risk', '<무엇이 잘못됐거나 위험했는지>', '', '## Root Cause', '<왜 그렇게 됐는지 — 표면 증상이 아니라 근본 원인>', '', '## Fix', '<어떻게 고쳤는지>', '', '## Prevention Checklist', '- <다음에 비슷한 작업을 할 때 반드시 확인할 것>', '- ', '', '## Applies To', '- <태그: 기능/영역 이름>', '', ].join('\n'); } /** Filesystem-safe slug for a lesson filename. */ export function lessonSlug(title: string): string { const base = (title || 'lesson') .toLowerCase() .replace(/[^a-z0-9가-힣]+/g, '-') .slice(0, 60) .replace(/^-+|-+$/g, ''); return base || 'lesson'; } // ── QA-feedback (regression complaint) detection ───────────────────────────── /** * Heuristic: does this user message look like "you broke something again / same mistake / why does * this keep happening"? If so, the host offers to record a lesson. Deliberately conservative — false * positives just show a dismissible prompt, but we'd rather not nag. */ const QA_REGRESSION_PATTERNS: RegExp[] = [ /또\s*(안\s*돼|안되|이래|발생|터졌|깨졌|망가졌)/, /(다시|또)\s*같은\s*(실수|문제|버그|에러|오류)/, /(비슷한|똑같은)\s*(실수|문제|버그|이슈|패턴)/, /왜\s*(자꾸|계속|반복|또)/, /(고쳤는데|수정했는데|패치했는데|바꿨는데)\s*(또|다시|여전히|아직).{0,20}(안|깨|망|문제|에러|오류|실패|broke|broken)/i, /(여전히|아직도)\s*(안\s*돼|안되|버그|깨|문제|실패)/, /regress(ion|ed)?/i, /\b(broke|broken|failing|still\s+broken|same\s+(bug|mistake|issue|error)|again)\b.{0,40}\b(again|still|repeat|recurr)/i, /\bwhy\b.{0,30}\b(keep|again|repeatedly|recurr)/i, ]; export function isQaRegressionFeedback(prompt: string): boolean { if (!prompt) return false; const t = prompt.trim(); if (t.length < 4 || t.length > 4000) return false; return QA_REGRESSION_PATTERNS.some((re) => re.test(t)); } // ── Lesson frontmatter parse / occurrences bump (for dedup-merge) ──────────── export interface LessonFrontmatter { type?: string; title?: string; occurrences?: number; appliesTo?: string[]; } /** Parse the leading `--- ... ---` block. Returns {} when there is no frontmatter. */ export function parseLessonFrontmatter(content: string): LessonFrontmatter { if (!content) return {}; const head = content.slice(0, 2000); if (!/^?---\s*\n/.test(head)) return {}; const end = head.indexOf('\n---', 4); if (end < 0) return {}; const block = head.slice(0, end); const get = (key: string) => { const m = block.match(new RegExp(`^\\s*${key}\\s*:\\s*(.+?)\\s*$`, 'm')); return m ? m[1].replace(/^["']|["']$/g, '').trim() : undefined; }; const occ = get('occurrences'); const tags = get('applies-to'); let appliesTo: string[] | undefined; if (tags) { const inner = tags.replace(/^\[|\]$/g, '').trim(); appliesTo = inner ? inner.split(',').map((s) => s.trim().replace(/^["']|["']$/g, '').trim()).filter(Boolean) : []; } return { type: get('type')?.toLowerCase(), title: get('title'), occurrences: occ !== undefined && Number.isFinite(Number(occ)) ? Number(occ) : undefined, appliesTo, }; } /** Normalize a lesson title for equality matching (lowercase, strip punctuation/whitespace). */ export function normalizeLessonTitle(title: string): string { return (title || '').toLowerCase().replace(/[^a-z0-9가-힣]+/g, ''); } /** * Return `content` with the frontmatter's `occurrences:` incremented by 1 and `last-seen:` set to * `today`. If the keys are missing they're inserted just inside the frontmatter block. If there is * no frontmatter at all, `content` is returned unchanged (caller decides what to do). */ export function bumpLessonOccurrences(content: string, today: string): string { if (!/^?---\s*\n/.test(content)) return content; const end = content.indexOf('\n---', 4); if (end < 0) return content; let block = content.slice(0, end); const rest = content.slice(end); const cur = parseLessonFrontmatter(content).occurrences ?? 1; if (/^\s*occurrences\s*:/m.test(block)) { block = block.replace(/^(\s*occurrences\s*:\s*).*$/m, `$1${cur + 1}`); } else { block += `\noccurrences: ${cur + 1}`; } if (/^\s*last-seen\s*:/m.test(block)) { block = block.replace(/^(\s*last-seen\s*:\s*).*$/m, `$1${today}`); } else { block += `\nlast-seen: ${today}`; } return block + rest; } // ── Post-answer checklist coverage (non-blocking flag) ────────────────────── /** "Significant" words of a checklist item — drops placeholders, punctuation, very short tokens. */ function checklistItemTerms(item: string): string[] { if (/^" return Array.from(new Set(tokenize(item))).filter((t) => t.length >= 2); } /** * Given the assistant's answer and the lesson cards injected this turn, return Prevention-Checklist * items that the answer does not visibly address (zero of their significant terms appear). Conservative * by design — only flags items with at least 2 significant terms and a real, non-placeholder body. * Capped at `max` items so the footer doesn't get noisy. */ export function findUnaddressedChecklistItems(answer: string, lessonContents: string[], max = 3): string[] { if (!answer || !lessonContents || lessonContents.length === 0) return []; const answerTerms = new Set(tokenize(answer)); const out: string[] = []; const seen = new Set(); for (const content of lessonContents) { for (const item of extractPreventionChecklist(content)) { const key = normalizeLessonTitle(item); if (!key || seen.has(key)) continue; const terms = checklistItemTerms(item); if (terms.length < 2) continue; // too vague to judge const covered = terms.some((t) => answerTerms.has(t)); if (!covered) { out.push(item); seen.add(key); if (out.length >= max) return out; } } } return out; }