refactor: optimize core engine and retrieval logic for v2.80.43
This commit is contained in:
@@ -47,6 +47,54 @@ function parseFrontmatterType(content: string): string {
|
||||
return m ? m[1].trim().toLowerCase() : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull a specific markdown section ("## NAME ... up to the next heading") from a lesson card.
|
||||
* Returns trimmed body text, or '' if the heading isn't found.
|
||||
*/
|
||||
function extractSection(content: string, headingRe: RegExp): string {
|
||||
const m = content.match(headingRe);
|
||||
if (!m || m.index === undefined) return '';
|
||||
const after = content.slice(m.index + m[0].length);
|
||||
const stop = after.search(/\n#{1,6}\s/);
|
||||
const section = stop >= 0 ? after.slice(0, stop) : after;
|
||||
return section.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Slim a lesson card down to the sections that actually matter for guardrails:
|
||||
* Mistake / Risk, Root Cause, Fix, and Prevention Checklist. Drops Situation,
|
||||
* Applies-To, and any verbose narrative. Returned text is markdown-compatible
|
||||
* with the original headings so the model still sees the structure.
|
||||
*
|
||||
* Falls back to the original content (clipped to `maxLen`) if no recognised
|
||||
* sections are found — keeps backwards-compat for old lessons that don't
|
||||
* follow the current template.
|
||||
*
|
||||
* Why: lesson cards are loaded at 2500 chars each and three cards can eat
|
||||
* ~11K tokens. The essence sections are usually <600 chars total per card,
|
||||
* which trims retrieval tokens by ~70% without losing the signal.
|
||||
*/
|
||||
export function extractLessonEssence(content: string, maxLen = 1200): string {
|
||||
if (!content) return '';
|
||||
const sections: Array<{ heading: string; body: string }> = [];
|
||||
const want: Array<[string, RegExp]> = [
|
||||
['## Mistake / Risk', /^#{1,6}\s*(?:mistake\s*\/?\s*risk|mistake|risk|실수|문제)\s*$/im],
|
||||
['## Root Cause', /^#{1,6}\s*(?:root\s*cause|근본\s*원인|원인)\s*$/im],
|
||||
['## Fix', /^#{1,6}\s*(?:fix|해결|수정)\s*$/im],
|
||||
['## Prevention Checklist', /^#{1,6}\s*(?:prevention\s*checklist|prevention|체크리스트|예방\s*체크리스트)\s*$/im],
|
||||
];
|
||||
for (const [heading, re] of want) {
|
||||
const body = extractSection(content, re);
|
||||
if (body && !/^<[^>]+>$/.test(body)) sections.push({ heading, body });
|
||||
}
|
||||
if (sections.length === 0) {
|
||||
return content.length <= maxLen ? content.trim() : content.slice(0, maxLen).trim() + '\n…';
|
||||
}
|
||||
let assembled = sections.map((s) => `${s.heading}\n${s.body}`).join('\n\n');
|
||||
if (assembled.length > maxLen) assembled = assembled.slice(0, maxLen).trim() + '\n…';
|
||||
return assembled;
|
||||
}
|
||||
|
||||
/** Extract the "## Prevention Checklist" bullet list from a lesson card, if present. */
|
||||
export function extractPreventionChecklist(content: string): string[] {
|
||||
if (!content) return [];
|
||||
|
||||
Reference in New Issue
Block a user