326 lines
14 KiB
TypeScript
326 lines
14 KiB
TypeScript
/**
|
||
* ============================================================
|
||
* Lesson / Experience Memory — pure helpers (no vscode dependency)
|
||
*
|
||
* "Lesson" = a markdown file in the active brain that captures a past mistake/risk and how to avoid
|
||
* repeating it. Identified by a `lessons/` / `playbooks/` / `qa-findings/` path segment, or by
|
||
* frontmatter `type: lesson|playbook|qa-finding`. These are retrieved like any other brain file but
|
||
* boosted and injected as a prominent "verify before finalizing" checklist (see EXPERIENCE_MEMORY_PLAN.md).
|
||
* ============================================================
|
||
*/
|
||
|
||
import { tokenize } from './scoring';
|
||
|
||
/** Path segments that mark a file as lesson-like. */
|
||
export const LESSON_DIR_RE = /(^|[\\/])(lessons?|playbooks?|qa[-_]?findings?)([\\/]|$)/i;
|
||
|
||
export type LessonKind = 'lesson' | 'playbook' | 'qa-finding';
|
||
|
||
/**
|
||
* Decide whether a brain file is a lesson (and which kind). Cheap — only looks at the relative path
|
||
* and, if present, the YAML-ish frontmatter at the top of `content`.
|
||
*
|
||
* @returns the kind string, or '' for an ordinary note.
|
||
*/
|
||
export function detectLessonKind(relativePath: string, content: string): LessonKind | '' {
|
||
// 1) Frontmatter `type:` wins if present.
|
||
const fm = parseFrontmatterType(content);
|
||
if (fm === 'lesson' || fm === 'playbook' || fm === 'qa-finding') return fm;
|
||
// 2) Otherwise infer from the path.
|
||
const m = LESSON_DIR_RE.exec(relativePath || '');
|
||
if (!m) return '';
|
||
const seg = m[2].toLowerCase();
|
||
if (seg.startsWith('playbook')) return 'playbook';
|
||
if (seg.startsWith('qa')) return 'qa-finding';
|
||
return 'lesson';
|
||
}
|
||
|
||
/** Pull the `type:` value out of a leading `--- ... ---` frontmatter block. Returns '' if absent. */
|
||
function parseFrontmatterType(content: string): string {
|
||
if (!content) return '';
|
||
const head = content.slice(0, 800);
|
||
if (!/^?---\s*\n/.test(head)) return '';
|
||
const end = head.indexOf('\n---', 4);
|
||
if (end < 0) return '';
|
||
const block = head.slice(0, end);
|
||
const m = block.match(/^\s*type\s*:\s*["']?([a-zA-Z-]+)["']?\s*$/m);
|
||
return m ? m[1].trim().toLowerCase() : '';
|
||
}
|
||
|
||
/**
|
||
* Pull a specific markdown section ("## NAME ... up to the next heading") from a lesson card.
|
||
* Returns trimmed body text, or '' if the heading isn't found.
|
||
*/
|
||
function extractSection(content: string, headingRe: RegExp): string {
|
||
const m = content.match(headingRe);
|
||
if (!m || m.index === undefined) return '';
|
||
const after = content.slice(m.index + m[0].length);
|
||
const stop = after.search(/\n#{1,6}\s/);
|
||
const section = stop >= 0 ? after.slice(0, stop) : after;
|
||
return section.trim();
|
||
}
|
||
|
||
/**
|
||
* Slim a lesson card down to the sections that actually matter for guardrails:
|
||
* Mistake / Risk, Root Cause, Fix, and Prevention Checklist. Drops Situation,
|
||
* Applies-To, and any verbose narrative. Returned text is markdown-compatible
|
||
* with the original headings so the model still sees the structure.
|
||
*
|
||
* Falls back to the original content (clipped to `maxLen`) if no recognised
|
||
* sections are found — keeps backwards-compat for old lessons that don't
|
||
* follow the current template.
|
||
*
|
||
* Why: lesson cards are loaded at 2500 chars each and three cards can eat
|
||
* ~11K tokens. The essence sections are usually <600 chars total per card,
|
||
* which trims retrieval tokens by ~70% without losing the signal.
|
||
*/
|
||
export function extractLessonEssence(content: string, maxLen = 1200): string {
|
||
if (!content) return '';
|
||
const sections: Array<{ heading: string; body: string }> = [];
|
||
const want: Array<[string, RegExp]> = [
|
||
['## Mistake / Risk', /^#{1,6}\s*(?:mistake\s*\/?\s*risk|mistake|risk|실수|문제)\s*$/im],
|
||
['## Root Cause', /^#{1,6}\s*(?:root\s*cause|근본\s*원인|원인)\s*$/im],
|
||
['## Fix', /^#{1,6}\s*(?:fix|해결|수정)\s*$/im],
|
||
['## Prevention Checklist', /^#{1,6}\s*(?:prevention\s*checklist|prevention|체크리스트|예방\s*체크리스트)\s*$/im],
|
||
];
|
||
for (const [heading, re] of want) {
|
||
const body = extractSection(content, re);
|
||
if (body && !/^<[^>]+>$/.test(body)) sections.push({ heading, body });
|
||
}
|
||
if (sections.length === 0) {
|
||
return content.length <= maxLen ? content.trim() : content.slice(0, maxLen).trim() + '\n…';
|
||
}
|
||
let assembled = sections.map((s) => `${s.heading}\n${s.body}`).join('\n\n');
|
||
if (assembled.length > maxLen) assembled = assembled.slice(0, maxLen).trim() + '\n…';
|
||
return assembled;
|
||
}
|
||
|
||
/** Extract the "## Prevention Checklist" bullet list from a lesson card, if present. */
|
||
export function extractPreventionChecklist(content: string): string[] {
|
||
if (!content) return [];
|
||
const m = content.match(/^#{1,6}\s*(?:prevention\s*checklist|prevention|체크리스트|예방\s*체크리스트)\s*$/im);
|
||
if (!m || m.index === undefined) return [];
|
||
const after = content.slice(m.index + m[0].length);
|
||
// Stop at the next heading.
|
||
const stop = after.search(/\n#{1,6}\s/);
|
||
const section = stop >= 0 ? after.slice(0, stop) : after;
|
||
return section
|
||
.split('\n')
|
||
.map((l) => l.trim())
|
||
.filter((l) => /^[-*]\s+/.test(l))
|
||
.map((l) => l.replace(/^[-*]\s+/, '').trim())
|
||
.filter(Boolean);
|
||
}
|
||
|
||
export interface LessonChunkLite {
|
||
title: string; // relative path / display title
|
||
content: string; // excerpt or full card text
|
||
}
|
||
|
||
/**
|
||
* Build the prompt block injected ahead of the regular RAG context. Kept compact; if a card has a
|
||
* parseable Prevention Checklist we surface just that, otherwise the card text.
|
||
*/
|
||
export function buildLessonChecklistBlock(chunks: LessonChunkLite[]): string {
|
||
if (!chunks || chunks.length === 0) return '';
|
||
const sections: string[] = [];
|
||
for (const c of chunks) {
|
||
const checklist = extractPreventionChecklist(c.content);
|
||
const body = checklist.length > 0
|
||
? checklist.map((item) => `- [ ] ${item}`).join('\n')
|
||
: c.content.trim();
|
||
sections.push(`### ${c.title}\n${body}`);
|
||
}
|
||
return [
|
||
'[⚠ ACTIVE LESSONS — verify these BEFORE finalizing your answer]',
|
||
'These are recorded lessons from past work on this project. Read them first and make sure you are NOT',
|
||
'about to repeat any of the mistakes / skip any of the precautions below. If a checklist item is relevant',
|
||
'to the current request, explicitly confirm it in your answer. If a lesson conflicts with the user, prefer',
|
||
'the user but flag the conflict.',
|
||
'',
|
||
sections.join('\n\n'),
|
||
'',
|
||
'[END ACTIVE LESSONS]',
|
||
].join('\n');
|
||
}
|
||
|
||
/**
|
||
* A starter lesson card written by the `g1nation.lesson.create` / `…fromConversation` commands for
|
||
* the user to fill in. If `situation` is given (e.g. captured from the recent chat turn), it pre-fills
|
||
* the Situation section.
|
||
*/
|
||
export function lessonTemplate(title: string, today: string, situation?: string): string {
|
||
const safeTitle = (title || 'Untitled lesson').replace(/\n/g, ' ').trim();
|
||
const situationBody = (situation && situation.trim()) ? situation.trim() : '<무슨 작업/맥락이었는지>';
|
||
return [
|
||
'---',
|
||
'type: lesson',
|
||
`title: ${safeTitle}`,
|
||
'applies-to: []',
|
||
'severity: medium',
|
||
'source: curated',
|
||
'occurrences: 1',
|
||
`last-seen: ${today}`,
|
||
'---',
|
||
'',
|
||
`# Lesson: ${safeTitle}`,
|
||
'',
|
||
'## Situation',
|
||
situationBody,
|
||
'',
|
||
'## Mistake / Risk',
|
||
'<무엇이 잘못됐거나 위험했는지>',
|
||
'',
|
||
'## Root Cause',
|
||
'<왜 그렇게 됐는지 — 표면 증상이 아니라 근본 원인>',
|
||
'',
|
||
'## Fix',
|
||
'<어떻게 고쳤는지>',
|
||
'',
|
||
'## Prevention Checklist',
|
||
'- <다음에 비슷한 작업을 할 때 반드시 확인할 것>',
|
||
'- ',
|
||
'',
|
||
'## Applies To',
|
||
'- <태그: 기능/영역 이름>',
|
||
'',
|
||
].join('\n');
|
||
}
|
||
|
||
/** Filesystem-safe slug for a lesson filename. */
|
||
export function lessonSlug(title: string): string {
|
||
const base = (title || 'lesson')
|
||
.toLowerCase()
|
||
.replace(/[^a-z0-9가-힣]+/g, '-')
|
||
.slice(0, 60)
|
||
.replace(/^-+|-+$/g, '');
|
||
return base || 'lesson';
|
||
}
|
||
|
||
// ── QA-feedback (regression complaint) detection ─────────────────────────────
|
||
|
||
/**
|
||
* Heuristic: does this user message look like "you broke something again / same mistake / why does
|
||
* this keep happening"? If so, the host offers to record a lesson. Deliberately conservative — false
|
||
* positives just show a dismissible prompt, but we'd rather not nag.
|
||
*/
|
||
const QA_REGRESSION_PATTERNS: RegExp[] = [
|
||
/또\s*(안\s*돼|안되|이래|발생|터졌|깨졌|망가졌)/,
|
||
/(다시|또)\s*같은\s*(실수|문제|버그|에러|오류)/,
|
||
/(비슷한|똑같은)\s*(실수|문제|버그|이슈|패턴)/,
|
||
/왜\s*(자꾸|계속|반복|또)/,
|
||
/(고쳤는데|수정했는데|패치했는데|바꿨는데)\s*(또|다시|여전히|아직).{0,20}(안|깨|망|문제|에러|오류|실패|broke|broken)/i,
|
||
/(여전히|아직도)\s*(안\s*돼|안되|버그|깨|문제|실패)/,
|
||
/regress(ion|ed)?/i,
|
||
/\b(broke|broken|failing|still\s+broken|same\s+(bug|mistake|issue|error)|again)\b.{0,40}\b(again|still|repeat|recurr)/i,
|
||
/\bwhy\b.{0,30}\b(keep|again|repeatedly|recurr)/i,
|
||
];
|
||
export function isQaRegressionFeedback(prompt: string): boolean {
|
||
if (!prompt) return false;
|
||
const t = prompt.trim();
|
||
if (t.length < 4 || t.length > 4000) return false;
|
||
return QA_REGRESSION_PATTERNS.some((re) => re.test(t));
|
||
}
|
||
|
||
// ── Lesson frontmatter parse / occurrences bump (for dedup-merge) ────────────
|
||
|
||
export interface LessonFrontmatter {
|
||
type?: string;
|
||
title?: string;
|
||
occurrences?: number;
|
||
appliesTo?: string[];
|
||
}
|
||
|
||
/** Parse the leading `--- ... ---` block. Returns {} when there is no frontmatter. */
|
||
export function parseLessonFrontmatter(content: string): LessonFrontmatter {
|
||
if (!content) return {};
|
||
const head = content.slice(0, 2000);
|
||
if (!/^?---\s*\n/.test(head)) return {};
|
||
const end = head.indexOf('\n---', 4);
|
||
if (end < 0) return {};
|
||
const block = head.slice(0, end);
|
||
const get = (key: string) => {
|
||
const m = block.match(new RegExp(`^\\s*${key}\\s*:\\s*(.+?)\\s*$`, 'm'));
|
||
return m ? m[1].replace(/^["']|["']$/g, '').trim() : undefined;
|
||
};
|
||
const occ = get('occurrences');
|
||
const tags = get('applies-to');
|
||
let appliesTo: string[] | undefined;
|
||
if (tags) {
|
||
const inner = tags.replace(/^\[|\]$/g, '').trim();
|
||
appliesTo = inner ? inner.split(',').map((s) => s.trim().replace(/^["']|["']$/g, '').trim()).filter(Boolean) : [];
|
||
}
|
||
return {
|
||
type: get('type')?.toLowerCase(),
|
||
title: get('title'),
|
||
occurrences: occ !== undefined && Number.isFinite(Number(occ)) ? Number(occ) : undefined,
|
||
appliesTo,
|
||
};
|
||
}
|
||
|
||
/** Normalize a lesson title for equality matching (lowercase, strip punctuation/whitespace). */
|
||
export function normalizeLessonTitle(title: string): string {
|
||
return (title || '').toLowerCase().replace(/[^a-z0-9가-힣]+/g, '');
|
||
}
|
||
|
||
/**
|
||
* Return `content` with the frontmatter's `occurrences:` incremented by 1 and `last-seen:` set to
|
||
* `today`. If the keys are missing they're inserted just inside the frontmatter block. If there is
|
||
* no frontmatter at all, `content` is returned unchanged (caller decides what to do).
|
||
*/
|
||
export function bumpLessonOccurrences(content: string, today: string): string {
|
||
if (!/^?---\s*\n/.test(content)) return content;
|
||
const end = content.indexOf('\n---', 4);
|
||
if (end < 0) return content;
|
||
let block = content.slice(0, end);
|
||
const rest = content.slice(end);
|
||
const cur = parseLessonFrontmatter(content).occurrences ?? 1;
|
||
if (/^\s*occurrences\s*:/m.test(block)) {
|
||
block = block.replace(/^(\s*occurrences\s*:\s*).*$/m, `$1${cur + 1}`);
|
||
} else {
|
||
block += `\noccurrences: ${cur + 1}`;
|
||
}
|
||
if (/^\s*last-seen\s*:/m.test(block)) {
|
||
block = block.replace(/^(\s*last-seen\s*:\s*).*$/m, `$1${today}`);
|
||
} else {
|
||
block += `\nlast-seen: ${today}`;
|
||
}
|
||
return block + rest;
|
||
}
|
||
|
||
// ── Post-answer checklist coverage (non-blocking flag) ──────────────────────
|
||
|
||
/** "Significant" words of a checklist item — drops placeholders, punctuation, very short tokens. */
|
||
function checklistItemTerms(item: string): string[] {
|
||
if (/^</.test(item.trim())) return []; // template placeholder like "<다음에 확인할 것>"
|
||
return Array.from(new Set(tokenize(item))).filter((t) => t.length >= 2);
|
||
}
|
||
|
||
/**
|
||
* Given the assistant's answer and the lesson cards injected this turn, return Prevention-Checklist
|
||
* items that the answer does not visibly address (zero of their significant terms appear). Conservative
|
||
* by design — only flags items with at least 2 significant terms and a real, non-placeholder body.
|
||
* Capped at `max` items so the footer doesn't get noisy.
|
||
*/
|
||
export function findUnaddressedChecklistItems(answer: string, lessonContents: string[], max = 3): string[] {
|
||
if (!answer || !lessonContents || lessonContents.length === 0) return [];
|
||
const answerTerms = new Set(tokenize(answer));
|
||
const out: string[] = [];
|
||
const seen = new Set<string>();
|
||
for (const content of lessonContents) {
|
||
for (const item of extractPreventionChecklist(content)) {
|
||
const key = normalizeLessonTitle(item);
|
||
if (!key || seen.has(key)) continue;
|
||
const terms = checklistItemTerms(item);
|
||
if (terms.length < 2) continue; // too vague to judge
|
||
const covered = terms.some((t) => answerTerms.has(t));
|
||
if (!covered) {
|
||
out.push(item);
|
||
seen.add(key);
|
||
if (out.length >= max) return out;
|
||
}
|
||
}
|
||
}
|
||
return out;
|
||
}
|