refactor: optimize core engine and retrieval logic for v2.80.43

This commit is contained in:
2026-05-13 19:23:57 +09:00
parent c4260466b9
commit 089abf22db
17 changed files with 1311 additions and 88 deletions
+48
View File
@@ -47,6 +47,54 @@ function parseFrontmatterType(content: string): string {
return m ? m[1].trim().toLowerCase() : '';
}
/**
* Pull a specific markdown section ("## NAME ... up to the next heading") from a lesson card.
* Returns trimmed body text, or '' if the heading isn't found.
*/
function extractSection(content: string, headingRe: RegExp): string {
const m = content.match(headingRe);
if (!m || m.index === undefined) return '';
const after = content.slice(m.index + m[0].length);
const stop = after.search(/\n#{1,6}\s/);
const section = stop >= 0 ? after.slice(0, stop) : after;
return section.trim();
}
/**
* Slim a lesson card down to the sections that actually matter for guardrails:
* Mistake / Risk, Root Cause, Fix, and Prevention Checklist. Drops Situation,
* Applies-To, and any verbose narrative. Returned text is markdown-compatible
* with the original headings so the model still sees the structure.
*
* Falls back to the original content (clipped to `maxLen`) if no recognised
* sections are found — keeps backwards-compat for old lessons that don't
* follow the current template.
*
* Why: lesson cards are loaded at 2500 chars each and three cards can eat
* ~11K tokens. The essence sections are usually <600 chars total per card,
* which trims retrieval tokens by ~70% without losing the signal.
*/
export function extractLessonEssence(content: string, maxLen = 1200): string {
if (!content) return '';
const sections: Array<{ heading: string; body: string }> = [];
const want: Array<[string, RegExp]> = [
['## Mistake / Risk', /^#{1,6}\s*(?:mistake\s*\/?\s*risk|mistake|risk|실수|문제)\s*$/im],
['## Root Cause', /^#{1,6}\s*(?:root\s*cause|근본\s*원인|원인)\s*$/im],
['## Fix', /^#{1,6}\s*(?:fix|해결|수정)\s*$/im],
['## Prevention Checklist', /^#{1,6}\s*(?:prevention\s*checklist|prevention|체크리스트|예방\s*체크리스트)\s*$/im],
];
for (const [heading, re] of want) {
const body = extractSection(content, re);
if (body && !/^<[^>]+>$/.test(body)) sections.push({ heading, body });
}
if (sections.length === 0) {
return content.length <= maxLen ? content.trim() : content.slice(0, maxLen).trim() + '\n…';
}
let assembled = sections.map((s) => `${s.heading}\n${s.body}`).join('\n\n');
if (assembled.length > maxLen) assembled = assembled.slice(0, maxLen).trim() + '\n…';
return assembled;
}
/** Extract the "## Prevention Checklist" bullet list from a lesson card, if present. */
export function extractPreventionChecklist(content: string): string[] {
if (!content) return [];