feat: Self-Evolving Digital Employee OS P0~P6 + 캘린더 충돌 게이트

신뢰성 코어 (P1~P2): - Requirement Graph: 업무 유형(회의록/시장조사/업무조사/일정) 필수 요소 주입 + 커버리지 hook - Confidence Engine(0~100 결정론적) / Escalation Engine(검토 요청) / Epistemic Guard(모름·추정·확실 3분류) - Provenance: citationTrace 에 출처 수정일·오래됨 경고 - Critic Loop: 문제 신호 turn 만 LLM 검수 1회 + 보완 카드 성장 루프 (P3): - Gap Detector(Requirement-Knowledge) / Need Engine(30/25/20/15/10 공식) / Knowledge Inventory - Learning Queue(proposed 전용 병합 — 승인은 사람만) / Decision Journal / Reflection 기록 - 반복 누락 요소(3회+)는 다음 turn 체크리스트에 자동 강조 (T5 루프) 지식 운영 (P4) + 기억 (P5) + 학습 실행 (P6): - Knowledge Validation + Belief Revision(중복 reject·충돌 시 update/add 권고) - Knowledge Decay(분야별 반감기 감사) / Knowledge Debt(blocked x impact) - Organizational Memory(.astra/organization.md 상시 주입) - Research Agent(approved 큐 -> 조사 브리프+추정 라벨 초안+Validation 게이트 -> proposals/) - Skill Score(전/후반 추세) + Success Pattern DB(전요소충족+확신도90+ 자동 적재) 병렬 트랙: - 캘린더 충돌 게이트: conflictCheck + 구조화 이벤트 캐시 + create_calendar_event 차단(force 는 사용자 승인 후) - Task Eval Harness: 회의록 골든셋 자동 채점 명령 + 성장 리포트/학습 큐/노후 점검 명령 신규 모듈 17종(src/intelligence/), VS Code 명령 5종, 설정 11종, 테스트 +89건(전체 508 통과). 설계 문서: docs/SELF_EVOLVING_OS_MASTER_PLAN.md Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 13:42:09 +09:00
parent cbc2558550
commit 2afd1ac589
41 changed files with 4364 additions and 2 deletions
@@ -0,0 +1,165 @@
+/**
+ * Confidence Engine — 답변 확신도 0~100 결정론적 산출.
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 2 / Track 1-1. 신뢰 조건 T4
+ * "확신이 없으면 사람에게 묻는다" 의 측정 기반 — Escalation Engine 의 입력.
+ *
+ * 설계 원칙 (termValidator 와 동일): LLM 호출 없음. 검색 그라운딩 신호(턴 컨텍스트)와
+ * 답변 텍스트 신호(정규식)만으로 산출 — 매 turn 안전 실행, latency 0.
+ *
+ * 점수는 "모델이 얼마나 자신 있나" 가 아니라 "이 답변을 검증 없이 신뢰해도 되는
+ * 근거가 얼마나 갖춰졌나" 를 측정한다. 따라서 모델이 솔직하게 "(확인 필요)" 를
+ * 표시하면 점수가 *내려가는* 것이 올바른 동작 — 사용자 검토를 유도해야 하므로.
+ *
+ * 구간 (설계서 7.5):
+ *   90+    높음        — 그대로 신뢰 가능
+ *   70~89  보통        — 일반 업무 통과
+ *   50~69  낮음        — 업무 산출물이면 검토 권장
+ *   <50    매우 낮음   — 추가 조사 / 인간 검토 필요 (Escalation)
+ */
+
+/** 검색(pre-answer) 신호 — memoryContext 가 turn 마다 채움. */
+export interface RetrievalConfidenceSignals {
+    /** 선택된 검색 청크 수 (brain-trace 제외). */
+    chunkCount: number;
+    /** 최고 청크 score (0~1 정규화). 청크 없으면 0. */
+    topScore: number;
+    /** conflictSeverity 가 NONE 이 아닌 청크 수. */
+    conflictCount: number;
+    /** Intent Clarification 이 모호성을 감지했는가. */
+    ambiguityDetected: boolean;
+}
+
+export interface ConfidenceFactor {
+    /** 점수에 기여한 요인 설명 (footer 표시용). */
+    label: string;
+    /** 기여 점수 (±). */
+    delta: number;
+}
+
+export type ConfidenceBand = 'high' | 'medium' | 'low' | 'very-low';
+
+export interface ConfidenceResult {
+    score: number;            // 0~100
+    band: ConfidenceBand;
+    bandLabel: string;        // 높음/보통/낮음/매우 낮음
+    factors: ConfidenceFactor[];
+}
+
+const BAND_LABELS: Record<ConfidenceBand, string> = {
+    'high': '높음',
+    'medium': '보통',
+    'low': '낮음',
+    'very-low': '매우 낮음',
+};
+
+export function toBand(score: number): ConfidenceBand {
+    if (score >= 90) return 'high';
+    if (score >= 70) return 'medium';
+    if (score >= 50) return 'low';
+    return 'very-low';
+}
+
+/** 답변 텍스트에서 추출하는 신호. */
+export interface AnswerConfidenceSignals {
+    /** 헤지 마커 수 — "(확인 필요)", "추정", "확실하지 않" 등. */
+    hedgeCount: number;
+    /** 답변 끝 출처 라인이 검색 출처를 인용하는가. */
+    hasCitation: boolean;
+    /** 출처 라인이 "모델 지식" 만 표기하는가 (검색 출처 미사용). */
+    modelKnowledgeOnly: boolean;
+    /** Requirement 커버리지 — 검사 안 했으면 null. */
+    coverageMissing: number | null;
+}
+
+const HEDGE_PATTERN = /\(확인 필요\)|\(담당자 미정\)|\(기한 미정\)|추정(?:치|입니다|됩니다)?|확실하지 않|정확하지 않을 수|모르겠|알 수 없/g;
+
+/** 답변 텍스트 → 신호 추출 (결정론적). coverageMissing 은 호출자가 채움. */
+export function extractAnswerSignals(assistantAnswer: string, coverageMissing: number | null): AnswerConfidenceSignals {
+    const text = assistantAnswer || '';
+    const hedges = text.match(HEDGE_PATTERN);
+    const citationLine = /\*?출처:?\*?\s*(.+)/.exec(text);
+    const citationBody = citationLine ? citationLine[1] : '';
+    const modelKnowledgeOnly = /모델 지식/.test(citationBody);
+    return {
+        hedgeCount: hedges ? hedges.length : 0,
+        hasCitation: !!citationLine && !modelKnowledgeOnly,
+        modelKnowledgeOnly,
+        coverageMissing,
+    };
+}
+
+/**
+ * 확신도 산출. 가중치는 휴리스틱 v1 — Phase 3 Self Evaluation 골든셋이 쌓이면
+ * 사람 평가와의 상관으로 보정한다 (KPI: Need Accuracy).
+ */
+export function computeConfidence(
+    retrieval: RetrievalConfidenceSignals,
+    answer: AnswerConfidenceSignals,
+): ConfidenceResult {
+    const factors: ConfidenceFactor[] = [];
+    let score = 55; // 중립 출발점 — 신호가 전무하면 "낮음" 상단
+
+    // ─── 그라운딩 (최대 +25 / -15) ───
+    if (retrieval.chunkCount >= 3 && retrieval.topScore >= 0.5) {
+        factors.push({ label: `검색 근거 ${retrieval.chunkCount}건(강)`, delta: +25 });
+    } else if (retrieval.chunkCount >= 1) {
+        factors.push({ label: `검색 근거 ${retrieval.chunkCount}건`, delta: +12 });
+    } else {
+        factors.push({ label: '검색 근거 없음 (모델 일반 지식)', delta: -15 });
+    }
+
+    // ─── 출처 인용 (+8 / -5) ───
+    if (answer.hasCitation) {
+        factors.push({ label: '출처 인용 있음', delta: +8 });
+    } else if (answer.modelKnowledgeOnly) {
+        factors.push({ label: '모델 지식만 사용 명시', delta: -5 });
+    }
+
+    // ─── 지식 충돌 (건당 -8, 최대 -16) ───
+    if (retrieval.conflictCount > 0) {
+        const d = -Math.min(16, retrieval.conflictCount * 8);
+        factors.push({ label: `출처 간 충돌 ${retrieval.conflictCount}건`, delta: d });
+    }
+
+    // ─── 요청 모호성 (-10) ───
+    if (retrieval.ambiguityDetected) {
+        factors.push({ label: '요청 모호성 감지', delta: -10 });
+    }
+
+    // ─── Requirement 커버리지 (+10 / 누락당 -6, 최대 -18) ───
+    if (answer.coverageMissing !== null) {
+        if (answer.coverageMissing === 0) {
+            factors.push({ label: '필수 요소 전부 충족', delta: +10 });
+        } else {
+            const d = -Math.min(18, answer.coverageMissing * 6);
+            factors.push({ label: `필수 요소 ${answer.coverageMissing}개 누락 가능`, delta: d });
+        }
+    }
+
+    // ─── 헤지 표현 (개당 -4, 최대 -12) — 솔직한 불확실 표시 = 검토 유도 ───
+    if (answer.hedgeCount > 0) {
+        const d = -Math.min(12, answer.hedgeCount * 4);
+        factors.push({ label: `불확실 표시 ${answer.hedgeCount}곳`, delta: d });
+    }
+
+    for (const f of factors) score += f.delta;
+    score = Math.max(0, Math.min(100, Math.round(score)));
+    const band = toBand(score);
+    return { score, band, bandLabel: BAND_LABELS[band], factors };
+}
+
+/**
+ * 확신도 footer 한 줄. 항상 표시 (사용자가 매 답변의 신뢰 수준을 보도록) —
+ * 끄려면 g1nation.confidenceEngineEnabled=false.
+ */
+export function formatConfidenceFooter(result: ConfidenceResult): string {
+    const icon = result.band === 'high' ? '🟢' : result.band === 'medium' ? '🔵' : result.band === 'low' ? '🟡' : '🔴';
+    const top = result.factors
+        .slice()
+        .sort((a, b) => Math.abs(b.delta) - Math.abs(a.delta))
+        .slice(0, 3)
+        .map((f) => f.label)
+        .join(' · ');
+    return `\n\n> ${icon} **확신도 ${result.score}/100 (${result.bandLabel})** — ${top}`;
+}
@@ -0,0 +1,174 @@
+/**
+ * Critic Agent + Debate Loop (v1) — 제출된 업무 산출물의 LLM 검수.
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 1 / Track 2-3. 신뢰 조건 T3 의 LLM 계층:
+ * Requirement Coverage(결정론적, 정규식) 가 "요소가 *언급* 됐는가" 만 보면,
+ * Critic 은 "내용이 *충실* 한가 + 결정/미결 구분이 맞는가 + 근거 없는 단정이
+ * 없는가" 를 본다.
+ *
+ * Debate Loop 원형은 작성→비판→재작성→재검토지만, 로컬 Gemma 의 latency 비용
+ * 때문에 v1 은 *조건부 1-pass 검수* — 결정론적 검사(커버리지 누락 또는 확신도
+ * <70)가 문제를 신호할 때만 Critic LLM 1회 호출, 결과를 답변 아래 보완 카드로
+ * 표시. 전면 다회전 debate 는 config knob(maxRounds) 만 준비해 두고 후속 증분.
+ *
+ * 모든 LLM 의존은 주입(critique caller) — 모듈 자체는 순수, 테스트 가능.
+ */
+
+import type { TaskRequirement } from './requirementGraph';
+
+export interface CriticIssue {
+    severity: 'major' | 'minor';
+    description: string;
+}
+
+export interface CritiqueResult {
+    /** true = 검수 통과 (보완 불필요). */
+    pass: boolean;
+    issues: CriticIssue[];
+    /** 누락 요소를 보완하는 추가 섹션 제안 (Critic 이 생성 가능했을 때만). */
+    supplement: string;
+    /** 디버그용 원문 (파싱 실패 분석). */
+    raw?: string;
+}
+
+/** 주입형 LLM caller — agent.ts 의 callNonStreaming 또는 평가 하니스의 단순 호출. */
+export type CritiqueLlmCall = (system: string, user: string, maxTokens: number) => Promise<string>;
+
+export interface CriticOptions {
+    /** 검수 대상 초안 최대 길이 (chars) — 초과분 잘라서 전달. 기본 12000. */
+    maxDraftChars: number;
+    /** Critic 응답 max tokens. 기본 700. */
+    maxTokens: number;
+}
+
+export const DEFAULT_CRITIC_OPTIONS: CriticOptions = {
+    maxDraftChars: 12000,
+    maxTokens: 700,
+};
+
+export function buildCritiquePrompt(
+    userPrompt: string,
+    draft: string,
+    requirement: TaskRequirement | null,
+    missingLabels: string[],
+    opts: CriticOptions = DEFAULT_CRITIC_OPTIONS,
+): { system: string; user: string } {
+    const reqSection = requirement
+        ? [
+            `업무 유형: ${requirement.label}`,
+            '필수 요소:',
+            ...requirement.elements.map((e) => `- ${e.label}: ${e.hint}`),
+        ].join('\n')
+        : '업무 유형: (미분류)';
+    const missingSection = missingLabels.length > 0
+        ? `결정론적 검사가 누락 가능성을 표시한 요소: ${missingLabels.join(', ')}`
+        : '결정론적 검사 통과 (참고용 재확인)';
+
+    const system = [
+        '너는 업무 산출물 검수자(Critic)다. 동료가 작성한 초안을 비판적으로 검토한다.',
+        '검수 기준:',
+        '1. 필수 요소가 *내용으로* 충실한가 (단어만 등장 ≠ 충족).',
+        '2. 결정사항과 미결(논의만 된 것)이 구분돼 있는가.',
+        '3. 근거 없는 단정·지어낸 수치/이름/날짜가 없는가. 원문에 없는 내용 발견 시 major.',
+        '4. 정보가 없는 항목은 "(확인 필요)" 로 솔직히 표시했는가.',
+        '',
+        '반드시 아래 JSON *만* 출력 (다른 텍스트 금지):',
+        '{"pass": true|false, "issues": [{"severity": "major"|"minor", "description": "..."}], "supplement": "누락 보완 텍스트 (보완 불가능하면 빈 문자열)"}',
+        'supplement 는 초안에 *실제로 추가할 수 있는* 마크다운 섹션만. 원문에 없는 내용을 지어내 보완하는 것은 금지 — 그 경우 "(확인 필요)" 항목으로 작성.',
+    ].join('\n');
+
+    const draftCapped = draft.length > opts.maxDraftChars ? draft.slice(0, opts.maxDraftChars) + '\n…(잘림)' : draft;
+    const user = [
+        `[원래 요청]\n${userPrompt}`,
+        `[검수 기준 컨텍스트]\n${reqSection}\n${missingSection}`,
+        `[검수 대상 초안]\n${draftCapped}`,
+    ].join('\n\n');
+
+    return { system, user };
+}
+
+/** Critic LLM 응답에서 JSON 추출 — 코드펜스/잡설 섞여도 첫 균형 {} 블록을 파싱. */
+export function parseCritique(raw: string): CritiqueResult | null {
+    if (!raw || !raw.trim()) return null;
+    const start = raw.indexOf('{');
+    if (start === -1) return null;
+    // 균형 괄호 스캔 — 중첩 객체(issues 배열 내부) 안전.
+    let depth = 0;
+    let end = -1;
+    let inString = false;
+    let escaped = false;
+    for (let i = start; i < raw.length; i++) {
+        const ch = raw[i];
+        if (escaped) { escaped = false; continue; }
+        if (ch === '\\') { escaped = true; continue; }
+        if (ch === '"') { inString = !inString; continue; }
+        if (inString) continue;
+        if (ch === '{') depth++;
+        else if (ch === '}') {
+            depth--;
+            if (depth === 0) { end = i; break; }
+        }
+    }
+    if (end === -1) return null;
+    try {
+        const obj = JSON.parse(raw.slice(start, end + 1));
+        const issues: CriticIssue[] = Array.isArray(obj.issues)
+            ? obj.issues
+                .filter((i: any) => i && typeof i.description === 'string')
+                .map((i: any) => ({
+                    severity: i.severity === 'major' ? 'major' : 'minor',
+                    description: String(i.description).slice(0, 500),
+                }))
+            : [];
+        return {
+            pass: obj.pass === true && issues.length === 0,
+            issues,
+            supplement: typeof obj.supplement === 'string' ? obj.supplement.slice(0, 4000) : '',
+            raw: raw.slice(0, 200),
+        };
+    } catch {
+        return null;
+    }
+}
+
+/**
+ * Critic 검수 1회 실행. LLM 실패/파싱 실패 시 null — 호출자(hook)는 silent skip
+ * (검수 실패가 main turn 을 막지 않도록).
+ */
+export async function runCriticReview(params: {
+    userPrompt: string;
+    draft: string;
+    requirement: TaskRequirement | null;
+    missingLabels: string[];
+    callLlm: CritiqueLlmCall;
+    options?: Partial<CriticOptions>;
+}): Promise<CritiqueResult | null> {
+    const opts: CriticOptions = { ...DEFAULT_CRITIC_OPTIONS, ...(params.options || {}) };
+    const { system, user } = buildCritiquePrompt(params.userPrompt, params.draft, params.requirement, params.missingLabels, opts);
+    let raw: string;
+    try {
+        raw = await params.callLlm(system, user, opts.maxTokens);
+    } catch {
+        return null;
+    }
+    return parseCritique(raw);
+}
+
+/** 검수 결과 footer — pass 면 빈 문자열 (노이즈 방지). */
+export function formatCriticFooter(critique: CritiqueResult): string {
+    if (critique.pass) return '';
+    const lines: string[] = [];
+    lines.push('\n\n> 🔁 **검수 (Critic)** — 초안에서 발견된 문제:');
+    for (const issue of critique.issues.slice(0, 6)) {
+        const tag = issue.severity === 'major' ? '🔴' : '🟡';
+        lines.push(`> - ${tag} ${issue.description}`);
+    }
+    if (critique.supplement && critique.supplement.trim()) {
+        lines.push('>');
+        lines.push('> **보완 제안:**');
+        for (const l of critique.supplement.trim().split('\n')) {
+            lines.push(`> ${l}`);
+        }
+    }
+    return lines.join('\n');
+}
@@ -0,0 +1,45 @@
+/**
+ * Epistemic Guard — 모름/추정/확실 3분류 강제 시스템 프롬프트 블록.
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 2 / Track 1-3 (Anti-Hallucination Layer).
+ * 신뢰 조건 T1 "모르면 모른다고 말한다" 담당.
+ *
+ * CoVe(coveBlock) 와의 분업:
+ *   - CoVe: 검색 *출처가 있을 때* 주장-출처 매핑을 검증 (그라운딩 점검)
+ *   - Epistemic Guard: 출처 유무와 *무관하게* 모든 주장의 인식론적 등급 표시를 강제,
+ *     특히 검색 근거가 *없는* turn 에서 단정 금지 + 역질문 우선 지시
+ *
+ * 즉 CoVe 가 못 덮는 "검색 결과 0건인데 모델이 그럴듯하게 지어내는" 케이스가
+ * 이 블록의 주 타깃. 검색 근거가 약할수록 지시가 강해진다.
+ */
+
+export interface EpistemicGuardSignals {
+    /** 선택된 검색 청크 수 (brain-trace 제외). */
+    chunkCount: number;
+    /** 업무 유형 감지됨 (Requirement Graph) — 업무 산출물은 더 엄격하게. */
+    taskDetected: boolean;
+}
+
+export function buildEpistemicGuardBlock(signals: EpistemicGuardSignals): string {
+    const lines: string[] = [];
+    lines.push('[EPISTEMIC GUARD]');
+    lines.push('모든 사실성 주장은 다음 3등급 중 하나로 인식하고, 등급이 낮으면 표시할 것:');
+    lines.push('');
+    lines.push('- **확실** — 검색 출처 또는 명백한 사실이 직접 지지. 표시 불필요.');
+    lines.push('- **추정** — 근거가 간접적이거나 일반화. 문장에 "~로 추정", "일반적으로" 명시.');
+    lines.push('- **모름 / 확인 필요** — 근거 없음. *지어내지 말고* "(확인 필요)" 표시 또는 솔직히 모른다고 말할 것.');
+    lines.push('');
+    lines.push('금지: 근거 없는 수치·날짜·고유명사·인용을 사실처럼 제시하는 것. 모름을 인정하는 답변이 그럴듯한 오답보다 항상 낫다.');
+
+    if (signals.chunkCount === 0) {
+        lines.push('');
+        lines.push('⚠️ 이번 턴은 *검색 근거가 없음* — 모델 일반 지식만으로 답하는 상태다.');
+        lines.push('- 구체적 수치·최신 정보·사용자 고유 정보(일정, 과거 회의 등)는 단정하지 말 것.');
+        if (signals.taskDetected) {
+            lines.push('- 업무 산출물 요청인데 근거가 없으므로, 필요한 원자료(회의 메모, 조사 범위 등)를 먼저 *질문*하는 것을 우선 고려할 것.');
+        }
+    }
+
+    lines.push('[/EPISTEMIC GUARD]');
+    return lines.join('\n');
+}
@@ -0,0 +1,74 @@
+/**
+ * Escalation Engine — 인간 개입 필요성 판단.
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 2 / Track 1-2. 신뢰 조건 T4 의 행동 부분:
+ * Confidence Engine 이 "얼마나 확실한가" 를 재면, 이 모듈은 "그래서 사람에게
+ * 물어야 하는가" 를 결정한다.
+ *
+ * 설계서 13장 조건: 확신도 낮음 / 영향도 높음 / 정보 부족 / 규칙 충돌 → 인간 검토.
+ *
+ * v1 은 결정론적 규칙 (LLM 호출 없음). 출력은 답변 아래 footer — 사용자에게
+ * "이 부분은 검토해 달라" 고 명시적으로 요청한다. 외부 액션 차단(승인 게이트)은
+ * Track 6-3 / approvalQueue 영역으로 분리.
+ */
+
+import type { ConfidenceResult } from './confidenceEngine';
+import type { CoverageResult } from './requirementGraph';
+
+export interface EscalationDecision {
+    escalate: boolean;
+    /** 검토 요청 이유 (사용자에게 그대로 표시). */
+    reasons: string[];
+}
+
+export interface EscalationInputs {
+    confidence: ConfidenceResult;
+    /** Requirement 커버리지 결과 (업무 미감지 시 ran=false). */
+    coverage: CoverageResult;
+    /** conflictSeverity != NONE 청크 수. */
+    conflictCount: number;
+}
+
+/** 산출물 신뢰가 특히 중요한 업무 — '보통' 미만이면 검토 요청. */
+const HIGH_IMPACT_TASKS = new Set(['meeting-minutes', 'market-research', 'schedule']);
+
+/** 근거 표시가 필수인 조사 업무에서 '출처' 누락은 단독으로도 에스컬레이션 사유. */
+const SOURCE_REQUIRED_TASKS = new Set(['market-research', 'work-research']);
+
+export function decideEscalation(inputs: EscalationInputs): EscalationDecision {
+    const { confidence, coverage, conflictCount } = inputs;
+    const reasons: string[] = [];
+
+    // 규칙 1 — 확신도 매우 낮음(<50): 업무 유형 무관 무조건 검토.
+    if (confidence.band === 'very-low') {
+        reasons.push(`확신도 매우 낮음 (${confidence.score}/100) — 추가 조사 또는 정보 제공 필요`);
+    }
+
+    // 규칙 2 — 고영향 업무 + 확신도 '보통' 미만(<70).
+    if (
+        confidence.score < 70 &&
+        confidence.band !== 'very-low' && // 규칙 1 과 중복 방지
+        coverage.ran !== false && coverage.taskId && HIGH_IMPACT_TASKS.has(coverage.taskId)
+    ) {
+        reasons.push(`${coverage.taskLabel} 업무인데 확신도 ${confidence.score}/100 — 사용 전 검토 권장`);
+    }
+
+    // 규칙 3 — 조사 업무에서 '출처' 요소 누락: 환각 수치 위험.
+    if (coverage.ran && coverage.taskId && SOURCE_REQUIRED_TASKS.has(coverage.taskId) && coverage.missing.includes('출처')) {
+        reasons.push('조사 결과에 출처 표기가 없음 — 핵심 수치·주장 검증 필요');
+    }
+
+    // 규칙 4 — 출처 간 충돌 + 확신도 90 미만: 어느 쪽을 믿을지 사용자 결정.
+    if (conflictCount > 0 && confidence.score < 90) {
+        reasons.push(`출처 간 충돌 ${conflictCount}건 — 어느 정보를 기준으로 할지 확인 필요`);
+    }
+
+    return { escalate: reasons.length > 0, reasons };
+}
+
+/** 에스컬레이션 footer — 검토 요청 사유 목록. 미해당 시 빈 문자열. */
+export function formatEscalationFooter(decision: EscalationDecision): string {
+    if (!decision.escalate) return '';
+    const lines = decision.reasons.map((r) => `> - ${r}`).join('\n');
+    return `\n\n> 🙋 **검토 요청** — 아래 사유로 사람 확인이 필요합니다:\n${lines}`;
+}
@@ -0,0 +1,73 @@
+/**
+ * Gap Detector — Gap = Requirement − Knowledge (설계서 7.4).
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 3 / Track 3-2. 업무 turn 마다 "필요한 것"
+ * (Requirement Graph 의 필수 요소)과 "가진 것"(검색 그라운딩 + 산출물 커버리지)을
+ * 비교해 부족 지식·영향도·긴급도를 산출한다.
+ *
+ * v1 신호 체계 (결정론적, LLM 없음):
+ *   - 요소 갭: 커버리지 검사에서 누락된 필수 요소
+ *   - 그라운딩 갭: 검색 근거가 없거나(chunkCount=0) 약한(topScore 낮음) 상태에서
+ *     업무를 수행한 것 — "지식이 없어서 모델 일반 지식으로 때운" 신호
+ *   - 영향도: 업무 유형 가중치 (고영향 업무 누락 = 더 심각)
+ *   - 긴급도: 같은 갭의 반복 (Reflection 의 recurrentMisses 와 결합)
+ *
+ * 출력은 Reflection 에 기록되어 Need Engine 의 입력이 된다.
+ */
+
+import type { CoverageResult } from './requirementGraph';
+import type { RetrievalConfidenceSignals } from './confidenceEngine';
+
+export type GapSeverity = 'none' | 'low' | 'medium' | 'high';
+
+export interface GapReport {
+    /** 산출물에서 누락된 필수 요소 (요소 갭). */
+    missingElements: string[];
+    /** 검색 근거 없이/약하게 수행 — 지식 갭 신호. */
+    weakGrounding: boolean;
+    severity: GapSeverity;
+    /** 사람이 읽는 갭 설명 (Need Engine·리포트용). */
+    summary: string;
+}
+
+/** 고영향 업무 — 갭 severity 한 단계 상향. escalationEngine 과 동일 기준. */
+const HIGH_IMPACT_TASKS = new Set(['meeting-minutes', 'market-research', 'schedule']);
+
+const SEVERITY_ORDER: GapSeverity[] = ['none', 'low', 'medium', 'high'];
+
+function bump(s: GapSeverity): GapSeverity {
+    const i = SEVERITY_ORDER.indexOf(s);
+    return SEVERITY_ORDER[Math.min(i + 1, SEVERITY_ORDER.length - 1)];
+}
+
+export function detectGaps(inputs: {
+    coverage: CoverageResult;
+    signals: RetrievalConfidenceSignals;
+    taskId: string | null;
+}): GapReport {
+    const { coverage, signals, taskId } = inputs;
+    const missingElements = coverage.ran ? coverage.missing.slice() : [];
+    const weakGrounding = signals.chunkCount === 0 || (signals.chunkCount > 0 && signals.topScore < 0.3);
+
+    let severity: GapSeverity = 'none';
+    if (missingElements.length >= 3) severity = 'high';
+    else if (missingElements.length > 0) severity = 'medium';
+    else if (weakGrounding) severity = 'low';
+    if (severity !== 'none' && taskId && HIGH_IMPACT_TASKS.has(taskId) && weakGrounding) {
+        severity = bump(severity);
+    }
+
+    const parts: string[] = [];
+    if (missingElements.length > 0) parts.push(`필수 요소 ${missingElements.length}개 누락(${missingElements.join(', ')})`);
+    if (weakGrounding) {
+        parts.push(signals.chunkCount === 0
+            ? '검색 근거 0건 — 모델 일반 지식으로 수행'
+            : `검색 근거 약함 (top score ${signals.topScore.toFixed(2)})`);
+    }
+    return {
+        missingElements,
+        weakGrounding,
+        severity,
+        summary: parts.length > 0 ? parts.join(' · ') : '갭 없음',
+    };
+}
@@ -0,0 +1,110 @@
+/**
+ * Knowledge Decay — 지식 노후 감쇠 점검 (설계서 10장, "인간처럼 잊어버리는 기능").
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 4 / Track 4-3. 분야별 반감기로 지식의
+ * 신선도 계수를 계산하고, 노후 지식을 보고서로 노출한다.
+ *
+ * v1 은 *비침습적 감사* — 검색 랭킹은 건드리지 않는다. RAG 평가 하니스로
+ * 튜닝된 검색 경로를 측정 없이 바꾸지 않기 위함 (decay 를 랭킹에 반영하려면
+ * 골든셋 A/B 로 효과를 증명한 뒤 별도 증분으로). citationTrace 의 Provenance
+ * 표시(180일+ 경고)와 상호 보완.
+ *
+ * 분야 분류는 경로/파일명 키워드 매칭 v1 — 설계서 예시(AI 30일 / SEO 90일 /
+ * 트렌드 180일)에 사용자 업무 도메인 규칙을 추가.
+ */
+
+export interface DecayRule {
+    label: string;
+    /** 경로(상대) 또는 파일명에 매치되는 패턴. */
+    match: RegExp;
+    halfLifeDays: number;
+}
+
+/** 위에서 아래로 첫 매치 적용. 마지막은 catch-all. */
+export const DEFAULT_DECAY_RULES: DecayRule[] = [
+    { label: 'AI/기술', match: /ai|llm|mcp|agent|rag|gpt|claude|gemma|모델|에이전트/iu, halfLifeDays: 30 },
+    { label: 'SEO/마케팅', match: /seo|마케팅|상위노출|키워드/iu, halfLifeDays: 90 },
+    { label: '시장/트렌드', match: /시장|트렌드|동향|경쟁사|market|trend/iu, halfLifeDays: 180 },
+    { label: '회의/프로젝트', match: /회의|meeting|프로젝트|일정/iu, halfLifeDays: 180 },
+    { label: '일반', match: /.*/, halfLifeDays: 365 },
+];
+
+export type DecayStatus = 'active' | 'aging' | 'stale';
+
+export interface DecayItem {
+    relPath: string;
+    category: string;
+    ageDays: number;
+    halfLifeDays: number;
+    /** 0.5^(age/halfLife) — 1.0 신선, 0.5 반감, ↓. */
+    factor: number;
+    status: DecayStatus;
+}
+
+export function classifyDecayRule(relPath: string, rules: DecayRule[] = DEFAULT_DECAY_RULES): DecayRule {
+    for (const rule of rules) if (rule.match.test(relPath)) return rule;
+    return rules[rules.length - 1];
+}
+
+export function decayFactor(lastUpdatedMs: number, halfLifeDays: number, nowMs: number): number {
+    const ageDays = Math.max(0, (nowMs - lastUpdatedMs) / 86400000);
+    return Math.pow(0.5, ageDays / halfLifeDays);
+}
+
+/**
+ * 파일 목록 → 노후 감사. factor ≥0.5 active(반감기 내), ≥0.25 aging(반감 1~2회),
+ * 그 밑 stale(반감 2회+ — 우선 검토 대상).
+ */
+export function auditKnowledgeDecay(
+    files: Array<{ relPath: string; lastUpdated: number }>,
+    options: { rules?: DecayRule[]; nowMs?: number } = {},
+): DecayItem[] {
+    const rules = options.rules ?? DEFAULT_DECAY_RULES;
+    const now = options.nowMs ?? Date.now();
+    const items: DecayItem[] = files.map((f) => {
+        const rule = classifyDecayRule(f.relPath, rules);
+        const factor = decayFactor(f.lastUpdated, rule.halfLifeDays, now);
+        const ageDays = Math.max(0, (now - f.lastUpdated) / 86400000);
+        const status: DecayStatus = factor >= 0.5 ? 'active' : factor >= 0.25 ? 'aging' : 'stale';
+        return {
+            relPath: f.relPath,
+            category: rule.label,
+            ageDays: Math.round(ageDays),
+            halfLifeDays: rule.halfLifeDays,
+            factor,
+            status,
+        };
+    });
+    // stale 우선(낮은 factor 순) — 보고서 상단이 가장 급한 검토 대상.
+    return items.sort((a, b) => a.factor - b.factor);
+}
+
+export function formatDecayReport(items: DecayItem[], meta: { brainName: string; dateStr: string }): string {
+    const lines: string[] = [];
+    lines.push('# 지식 노후 점검 (Knowledge Decay)');
+    lines.push('');
+    lines.push(`- 두뇌: ${meta.brainName} · 일시: ${meta.dateStr}`);
+    lines.push('- 분야별 반감기: AI/기술 30일 · SEO 90일 · 시장/트렌드·회의 180일 · 일반 365일');
+    lines.push('');
+    const counts = { active: 0, aging: 0, stale: 0 } as Record<DecayStatus, number>;
+    for (const it of items) counts[it.status]++;
+    lines.push(`## 요약 — 신선 ${counts.active} · 노화 중 ${counts.aging} · **노후 ${counts.stale}**`);
+    lines.push('');
+    const stale = items.filter((i) => i.status === 'stale').slice(0, 50);
+    if (stale.length === 0) {
+        lines.push('노후(stale) 지식 없음.');
+    } else {
+        lines.push('## 노후 지식 — 갱신/보관/폐기 검토 대상 (factor 낮은 순, 최대 50)');
+        lines.push('');
+        lines.push('| 파일 | 분야 | 경과일 | 반감기 | factor |');
+        lines.push('|---|---|---|---|---|');
+        for (const it of stale) {
+            lines.push(`| ${it.relPath} | ${it.category} | ${it.ageDays} | ${it.halfLifeDays} | ${it.factor.toFixed(2)} |`);
+        }
+        lines.push('');
+        lines.push('> 처리 권고: 여전히 유효하면 파일을 한 번 갱신(저장)해 신선도를 리셋, 낡았으면 보관 폴더로 이동 또는 삭제.');
+        lines.push('> v1 은 보고만 한다 — 자동 이동/삭제 없음 (Human Override 원칙).');
+    }
+    lines.push('');
+    return lines.join('\n');
+}
@@ -0,0 +1,168 @@
+/**
+ * Knowledge Validation + Belief Revision — 지식 저장 전 검증 (설계서 10장).
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 4 / Track 4-1 + 4-2. 새 지식 후보를
+ * 기존 지식과 비교해 수용/검토/거부를 판정하고, 충돌 시 Add/Update/Retire
+ * 권고를 만든다.
+ *
+ * Constitution 준수: 이 모듈은 *판정과 권고만* 한다 — 실제 저장·폐기는 승인
+ * 흐름(Learning Queue / 사용자)을 거친다 (Permission Based Learning).
+ *
+ * v1 은 결정론적 (LLM 없음):
+ *   - 중복: 토큰 Jaccard 유사도 ≥ 0.85 → reject
+ *   - 충돌/관련: 0.35 ≤ 유사도 < 0.85 → review + Belief Revision 권고
+ *     (후보가 더 최신 → update / 기존이 더 최신·불명 → 병존 add 후 사람 판단)
+ *   - 출처 없음 → 자동 수용 금지 (최대 review) — Provenance 원칙
+ *   - 수집일이 오래됨 → review (낡은 지식 유입 방지)
+ *
+ * 사용처: Research Agent (P6) 가 수집한 지식의 저장 게이트. 지금은 라이브러리 +
+ * 테스트로 준비 — Research Agent 배선 시 그대로 연결.
+ */
+
+export interface KnowledgeCandidate {
+    title: string;
+    content: string;
+    /** 출처 (URL/문서명). 없으면 자동 수용 불가. */
+    source?: string;
+    /** 수집 시각 ISO. */
+    collectedAt?: string;
+}
+
+export interface ExistingKnowledgeRef {
+    title: string;
+    content: string;
+    /** epoch ms. */
+    lastUpdated?: number;
+    filePath?: string;
+}
+
+export type ValidationVerdict = 'accept' | 'review' | 'reject';
+export type BeliefRevisionAction = 'add' | 'update' | 'retire-old';
+
+export interface ValidationResult {
+    verdict: ValidationVerdict;
+    checks: {
+        hasSource: boolean;
+        freshness: 'fresh' | 'stale' | 'unknown';
+        /** 중복 판정된 기존 지식 title. */
+        duplicateOf: string | null;
+        /** 충돌/관련 판정된 기존 지식 title. */
+        conflictsWith: string | null;
+        similarity: number;
+    };
+    /** 충돌 시 권고 (검토자에게 표시). 충돌 없으면 'add'. */
+    beliefRevision: BeliefRevisionAction;
+    reasons: string[];
+}
+
+export interface ValidationOptions {
+    /** 이 일수보다 오래 전 수집된 후보는 stale. 기본 365. */
+    staleAfterDays: number;
+    /** 중복 임계 Jaccard. 기본 0.85. */
+    duplicateThreshold: number;
+    /**
+     * 관련/충돌 임계 Jaccard. 기본 0.25 — 한국어는 조사 변형(계산은/계산이) 때문에
+     * 같은 주제 문서도 토큰 Jaccard 가 낮게 나온다. 영어 위주 지식이면 0.35 권장.
+     */
+    conflictThreshold: number;
+    /** 테스트 주입용 현재 시각. */
+    nowMs?: number;
+}
+
+export const DEFAULT_VALIDATION_OPTIONS: ValidationOptions = {
+    staleAfterDays: 365,
+    duplicateThreshold: 0.85,
+    conflictThreshold: 0.25,
+};
+
+/** 공백/문장부호 기준 토큰화 — 한글·영문 공용 v1. */
+function tokenize(text: string): Set<string> {
+    return new Set(
+        (text || '')
+            .toLowerCase()
+            .replace(/[^\w가-힣\s]/gu, ' ')
+            .split(/\s+/)
+            .filter((t) => t.length >= 2),
+    );
+}
+
+export function jaccardSimilarity(a: string, b: string): number {
+    const ta = tokenize(a);
+    const tb = tokenize(b);
+    if (ta.size === 0 || tb.size === 0) return 0;
+    let inter = 0;
+    for (const t of ta) if (tb.has(t)) inter++;
+    return inter / (ta.size + tb.size - inter);
+}
+
+export function validateKnowledgeCandidate(
+    candidate: KnowledgeCandidate,
+    existing: ExistingKnowledgeRef[],
+    options: Partial<ValidationOptions> = {},
+): ValidationResult {
+    const opts: ValidationOptions = { ...DEFAULT_VALIDATION_OPTIONS, ...options };
+    const now = opts.nowMs ?? Date.now();
+    const reasons: string[] = [];
+
+    // ─── 출처 (Provenance) ───
+    const hasSource = !!(candidate.source && candidate.source.trim());
+    if (!hasSource) reasons.push('출처 없음 — 자동 수용 불가');
+
+    // ─── 최신성 ───
+    let freshness: ValidationResult['checks']['freshness'] = 'unknown';
+    if (candidate.collectedAt) {
+        const t = Date.parse(candidate.collectedAt);
+        if (!isNaN(t)) {
+            const ageDays = (now - t) / 86400000;
+            freshness = ageDays > opts.staleAfterDays ? 'stale' : 'fresh';
+            if (freshness === 'stale') reasons.push(`수집일이 ${opts.staleAfterDays}일 이상 경과`);
+        }
+    }
+
+    // ─── 중복·충돌 — 가장 유사한 기존 지식 1건 기준 ───
+    let bestSim = 0;
+    let bestRef: ExistingKnowledgeRef | null = null;
+    for (const ref of existing) {
+        const sim = jaccardSimilarity(candidate.content, ref.content);
+        if (sim > bestSim) { bestSim = sim; bestRef = ref; }
+    }
+
+    let duplicateOf: string | null = null;
+    let conflictsWith: string | null = null;
+    let beliefRevision: BeliefRevisionAction = 'add';
+
+    if (bestRef && bestSim >= opts.duplicateThreshold) {
+        duplicateOf = bestRef.title;
+        reasons.push(`기존 지식과 중복 (유사도 ${bestSim.toFixed(2)}: ${bestRef.title})`);
+    } else if (bestRef && bestSim >= opts.conflictThreshold) {
+        conflictsWith = bestRef.title;
+        // Belief Revision (Track 4-2) — 어느 쪽을 믿을 것인가.
+        const candTime = candidate.collectedAt ? Date.parse(candidate.collectedAt) : NaN;
+        const existTime = bestRef.lastUpdated ?? NaN;
+        if (!isNaN(candTime) && !isNaN(existTime) && candTime > existTime) {
+            beliefRevision = 'update';
+            reasons.push(`기존 지식과 관련/충돌 — 후보가 더 최신 → 갱신(update) 권고, 기존은 폐기(retire) 검토 (${bestRef.title})`);
+        } else {
+            beliefRevision = 'add';
+            reasons.push(`기존 지식과 관련/충돌 — 신선도 우위 불명 → 병존(add) 후 사람 판단 (${bestRef.title})`);
+        }
+    }
+
+    // ─── 종합 판정 ───
+    let verdict: ValidationVerdict;
+    if (duplicateOf) {
+        verdict = 'reject';
+    } else if (!hasSource || freshness === 'stale' || conflictsWith) {
+        verdict = 'review';
+    } else {
+        verdict = 'accept';
+        reasons.push('출처 있음 · 중복/충돌 없음');
+    }
+
+    return {
+        verdict,
+        checks: { hasSource, freshness, duplicateOf, conflictsWith, similarity: bestSim },
+        beliefRevision,
+        reasons,
+    };
+}
@@ -0,0 +1,118 @@
+/**
+ * Learning Queue — 승인 기반 학습 대기열 (설계서 9장).
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 3 / Track 3-5. Need Engine 의 우선순위를
+ * 사람이 승인 가능한 큐로 영속화한다.
+ *
+ * Constitution 준수 (Track 8-2, Permission Based Learning):
+ *   - 시스템은 항목을 *proposed* 로만 추가한다 — 승인은 사람만.
+ *   - 사용자가 파일에서 status 를 approved 로 바꾸면 학습 실행 대상이 된다
+ *     (실행은 Research Agent — Phase 6 후속 증분).
+ *   - mergeNeedsIntoQueue 는 proposed 항목만 갱신하고, 사람이 정한 상태
+ *     (approved/in-progress/done/rejected)는 절대 건드리지 않는다.
+ *
+ * 저장: <brain>/.astra/growth/learning-queue.json (사람이 직접 편집 가능하도록
+ * pretty-print JSON 단일 파일 — md-first ASTRA 철학과 동일한 "파일이 UI" 접근).
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+import type { NeedItem } from './needEngine';
+
+export const LEARNING_QUEUE_REL_PATH = path.join('.astra', 'growth', 'learning-queue.json');
+
+export type QueueStatus = 'proposed' | 'approved' | 'in-progress' | 'done' | 'rejected';
+
+export interface QueueItem {
+    /** 안정 키 — 업무유형 기반 (v1). 같은 키는 한 항목으로 유지·갱신. */
+    id: string;
+    topic: string;
+    /** Need Score 0~100 — 갱신 시 최신값으로 교체 (proposed 한정). */
+    priority: number;
+    reason: string;
+    status: QueueStatus;
+    createdAt: string;
+    updatedAt: string;
+}
+
+const VALID_STATUSES: QueueStatus[] = ['proposed', 'approved', 'in-progress', 'done', 'rejected'];
+
+export function loadQueue(brainPath: string): QueueItem[] {
+    try {
+        const file = path.join(brainPath, LEARNING_QUEUE_REL_PATH);
+        if (!fs.existsSync(file)) return [];
+        const arr = JSON.parse(fs.readFileSync(file, 'utf8'));
+        if (!Array.isArray(arr)) return [];
+        return arr.filter((it: any) =>
+            it && typeof it.id === 'string' && VALID_STATUSES.includes(it.status),
+        ) as QueueItem[];
+    } catch {
+        return [];
+    }
+}
+
+export function saveQueue(brainPath: string, queue: QueueItem[]): boolean {
+    try {
+        const file = path.join(brainPath, LEARNING_QUEUE_REL_PATH);
+        fs.mkdirSync(path.dirname(file), { recursive: true });
+        // 우선순위 높은 순 정렬 저장 — 파일을 열면 위가 가장 급한 학습.
+        const sorted = queue.slice().sort((a, b) => b.priority - a.priority);
+        fs.writeFileSync(file, JSON.stringify(sorted, null, 2) + '\n', 'utf8');
+        return true;
+    } catch {
+        return false;
+    }
+}
+
+/**
+ * Need 결과를 큐에 병합.
+ *   - 새 주제 → proposed 로 추가
+ *   - 기존 proposed → priority/reason 최신화
+ *   - 사람이 정한 상태(approved 등) → 변경하지 않음 (Human Override)
+ *   - done/rejected 항목은 Need 가 다시 높아져도 재제안하지 않음 (v1 — 의도적 보수성;
+ *     재제안이 필요하면 사용자가 항목을 지우면 된다)
+ */
+export function mergeNeedsIntoQueue(queue: QueueItem[], needs: NeedItem[], nowIso: string): QueueItem[] {
+    const byId = new Map(queue.map((q) => [q.id, q] as const));
+    for (const need of needs) {
+        const id = `need-${need.taskId}`;
+        const existing = byId.get(id);
+        if (!existing) {
+            byId.set(id, {
+                id,
+                topic: `${need.taskLabel} 역량 보강${need.topMisses.length ? ` (자주 누락: ${need.topMisses.join(', ')})` : ''}`,
+                priority: need.score,
+                reason: need.reason,
+                status: 'proposed',
+                createdAt: nowIso,
+                updatedAt: nowIso,
+            });
+        } else if (existing.status === 'proposed') {
+            existing.priority = need.score;
+            existing.reason = need.reason;
+            existing.updatedAt = nowIso;
+        }
+        // approved/in-progress/done/rejected — 사람이 정한 상태, 불변.
+    }
+    return Array.from(byId.values());
+}
+
+export function formatQueueMarkdown(queue: QueueItem[]): string {
+    const lines: string[] = [];
+    lines.push('# Learning Queue');
+    lines.push('');
+    lines.push('상태 변경은 learning-queue.json 에서 직접: proposed → **approved** (학습 승인) / rejected.');
+    lines.push('approved 항목은 Research Agent(후속 증분)가 처리합니다. 시스템은 proposed 만 추가/갱신합니다.');
+    lines.push('');
+    if (queue.length === 0) {
+        lines.push('큐가 비어 있습니다.');
+        return lines.join('\n');
+    }
+    lines.push('| 우선순위 | 주제 | 상태 | 근거 |');
+    lines.push('|---|---|---|---|');
+    for (const q of queue.slice().sort((a, b) => b.priority - a.priority)) {
+        lines.push(`| ${q.priority} | ${q.topic} | ${q.status} | ${q.reason} |`);
+    }
+    lines.push('');
+    return lines.join('\n');
+}
@@ -0,0 +1,52 @@
+/**
+ * 단순 non-streaming LLM 호출 — Ollama / LM Studio(OpenAI 호환) 듀얼 엔드포인트.
+ *
+ * postHocSelfCheck 의 호출 패턴을 재사용 가능한 헬퍼로 분리. 평가 하니스·Critic 등
+ * AgentExecutor 밖에서 LLM 1회 호출이 필요한 곳이 사용한다 (확장 명령 등).
+ * agent turn 내부에서는 agent.ts 의 callNonStreaming 을 쓸 것 (cloud 라우팅 포함).
+ */
+
+export interface SimpleChatOptions {
+    baseUrl: string;
+    model: string;
+    temperature?: number;
+    maxTokens?: number;
+    timeoutMs?: number;
+}
+
+export async function simpleChatCompletion(
+    system: string,
+    user: string,
+    options: SimpleChatOptions,
+): Promise<string> {
+    const isOllama = options.baseUrl.includes(':11434') || options.baseUrl.includes('ollama');
+    const endpoint = isOllama ? `${options.baseUrl}/api/chat` : `${options.baseUrl}/v1/chat/completions`;
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), options.timeoutMs ?? 120000);
+    try {
+        const messages = [
+            { role: 'system', content: system },
+            { role: 'user', content: user },
+        ];
+        const body = isOllama
+            ? { model: options.model, stream: false, messages, options: { temperature: options.temperature ?? 0.2, num_predict: options.maxTokens ?? 1200 } }
+            : { model: options.model, stream: false, temperature: options.temperature ?? 0.2, max_tokens: options.maxTokens ?? 1200, messages };
+        const res = await fetch(endpoint, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify(body),
+            signal: controller.signal,
+        });
+        if (!res.ok) throw new Error(`HTTP ${res.status}`);
+        const data: any = await res.json();
+        return String(
+            data?.message?.content ??
+            data?.choices?.[0]?.message?.content ??
+            data?.choices?.[0]?.text ??
+            data?.response ??
+            '',
+        );
+    } finally {
+        clearTimeout(timer);
+    }
+}
@@ -0,0 +1,220 @@
+/**
+ * Need Engine — 학습 필요성 산출 (설계서 7.6) + Knowledge Inventory v1 (7.3).
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 3 / Track 3-3 + 3-1. Reflection 기록을
+ * 집계해 "무엇을 먼저 배워야 하는가" 를 점수로 산출한다 — 성장 루프의 두뇌.
+ *
+ * Need Score (설계서 공식, 0~100):
+ *   정보 부족도 × 30% + 실패율 × 25% + 업무 빈도 × 20% + 확신도 부족 × 15% + 사용자 피드백 × 10%
+ *
+ * v1 신호 매핑 (전부 Reflection 에서 결정론적으로):
+ *   - 정보 부족도: weakGrounding 비율 (검색 근거 없이 수행한 turn 비중)
+ *   - 실패율: 필수 요소 누락이 있었던 turn 비율
+ *   - 업무 빈도: 해당 업무 turn 수 / 전체 업무 turn 수
+ *   - 확신도 부족: (100 − 평균 확신도) / 100
+ *   - 사용자 피드백: v1 미수집 → 0 (필드는 유지, 후속 증분에서 연결)
+ *
+ * 출력은 Learning Queue 의 입력이 된다. 학습 실행은 승인 후 (Permission Based Learning).
+ */
+
+import type { ReflectionRecord } from './reflectionStore';
+
+export interface NeedItem {
+    /** 업무 유형 ID (학습 주제 단위 v1 — 후속: 요소/토픽 단위 세분화). */
+    taskId: string;
+    taskLabel: string;
+    /** 0~100. */
+    score: number;
+    /** 가중치별 기여 내역 (사람이 읽는 근거). */
+    breakdown: {
+        infoLack: number;    // 0~1
+        failRate: number;    // 0~1
+        frequency: number;   // 0~1
+        confidenceLack: number; // 0~1
+        feedback: number;    // 0~1 (v1 = 0)
+    };
+    /** 집계 표본 수. */
+    sampleCount: number;
+    /** 자주 누락된 요소 Top 3 — 학습 주제 구체화용. */
+    topMisses: string[];
+    reason: string;
+}
+
+export const NEED_WEIGHTS = {
+    infoLack: 0.30,
+    failRate: 0.25,
+    frequency: 0.20,
+    confidenceLack: 0.15,
+    feedback: 0.10,
+} as const;
+
+export function computeNeeds(records: ReflectionRecord[]): NeedItem[] {
+    const taskRecords = records.filter((r) => r.taskId);
+    if (taskRecords.length === 0) return [];
+
+    const byTask = new Map<string, ReflectionRecord[]>();
+    for (const r of taskRecords) {
+        const arr = byTask.get(r.taskId!) || [];
+        arr.push(r);
+        byTask.set(r.taskId!, arr);
+    }
+
+    const needs: NeedItem[] = [];
+    for (const [taskId, rs] of byTask) {
+        const infoLack = rs.filter((r) => r.weakGrounding === true).length / rs.length;
+        const failRate = rs.filter((r) => (r.missing || []).length > 0).length / rs.length;
+        const frequency = rs.length / taskRecords.length;
+        const avgConf = rs.reduce((s, r) => s + (r.confidenceScore || 0), 0) / rs.length;
+        const confidenceLack = Math.max(0, Math.min(1, (100 - avgConf) / 100));
+        const feedback = 0; // v1 미수집
+
+        const score = Math.round(100 * (
+            infoLack * NEED_WEIGHTS.infoLack +
+            failRate * NEED_WEIGHTS.failRate +
+            frequency * NEED_WEIGHTS.frequency +
+            confidenceLack * NEED_WEIGHTS.confidenceLack +
+            feedback * NEED_WEIGHTS.feedback
+        ));
+
+        // 자주 누락된 요소 Top 3.
+        const missCounts = new Map<string, number>();
+        for (const r of rs) for (const m of r.missing || []) missCounts.set(m, (missCounts.get(m) || 0) + 1);
+        const topMisses = Array.from(missCounts.entries()).sort((a, b) => b[1] - a[1]).slice(0, 3).map(([m]) => m);
+
+        const reasonParts: string[] = [];
+        if (infoLack > 0.3) reasonParts.push(`근거 없는 수행 ${(infoLack * 100).toFixed(0)}%`);
+        if (failRate > 0.3) reasonParts.push(`요소 누락률 ${(failRate * 100).toFixed(0)}%`);
+        if (confidenceLack > 0.3) reasonParts.push(`평균 확신도 ${avgConf.toFixed(0)}`);
+        if (topMisses.length > 0) reasonParts.push(`자주 누락: ${topMisses.join(', ')}`);
+
+        needs.push({
+            taskId,
+            taskLabel: rs[0].taskLabel || taskId,
+            score,
+            breakdown: { infoLack, failRate, frequency, confidenceLack, feedback },
+            sampleCount: rs.length,
+            topMisses,
+            reason: reasonParts.join(' · ') || '특이 신호 없음 (빈도 기반)',
+        });
+    }
+    return needs.sort((a, b) => b.score - a.score);
+}
+
+/**
+ * Knowledge Inventory v1 (Track 3-1) — 업무 유형별 지식 보유 상태.
+ * 보유/부족/없음 3등급 (설계서 7.3) 을 그라운딩 신호로 판정.
+ */
+export interface InventoryItem {
+    taskId: string;
+    taskLabel: string;
+    /** 'sufficient' | 'partial' | 'missing' */
+    status: 'sufficient' | 'partial' | 'missing';
+    avgChunkCount: number;
+    avgTopScore: number;
+    sampleCount: number;
+}
+
+export function knowledgeInventory(records: ReflectionRecord[]): InventoryItem[] {
+    const withRetrieval = records.filter((r) => r.taskId && r.retrieval);
+    const byTask = new Map<string, ReflectionRecord[]>();
+    for (const r of withRetrieval) {
+        const arr = byTask.get(r.taskId!) || [];
+        arr.push(r);
+        byTask.set(r.taskId!, arr);
+    }
+    const items: InventoryItem[] = [];
+    for (const [taskId, rs] of byTask) {
+        const avgChunkCount = rs.reduce((s, r) => s + (r.retrieval!.chunkCount || 0), 0) / rs.length;
+        const avgTopScore = rs.reduce((s, r) => s + (r.retrieval!.topScore || 0), 0) / rs.length;
+        const status: InventoryItem['status'] =
+            avgChunkCount >= 3 && avgTopScore >= 0.5 ? 'sufficient'
+            : avgChunkCount >= 1 ? 'partial'
+            : 'missing';
+        items.push({ taskId, taskLabel: rs[0].taskLabel || taskId, status, avgChunkCount, avgTopScore, sampleCount: rs.length });
+    }
+    return items.sort((a, b) => a.avgTopScore - b.avgTopScore);
+}
+
+/**
+ * Knowledge Debt (Track 4-4) — 부족 지식이 실제로 막은 업무 집계 (설계서 예:
+ * "GA4 — Blocked Tasks 17, Impact 9"). v1 단위는 업무 유형: 근거 없이/약하게
+ * 수행된 turn 수 = blocked, 그 turn 들의 갭 심각도 평균 = impact (0~10).
+ */
+export interface DebtItem {
+    taskId: string;
+    taskLabel: string;
+    /** 지식 부족 상태로 수행된 업무 turn 수. */
+    blockedTurns: number;
+    /** 평균 갭 심각도 0~10. */
+    impact: number;
+    /** blocked × impact — 정렬 키. */
+    debtScore: number;
+}
+
+const SEVERITY_SCORE: Record<string, number> = { none: 0, low: 3, medium: 6, high: 10 };
+
+export function computeKnowledgeDebt(records: ReflectionRecord[]): DebtItem[] {
+    const blocked = records.filter((r) => r.taskId && r.weakGrounding === true);
+    const byTask = new Map<string, ReflectionRecord[]>();
+    for (const r of blocked) {
+        const arr = byTask.get(r.taskId!) || [];
+        arr.push(r);
+        byTask.set(r.taskId!, arr);
+    }
+    const items: DebtItem[] = [];
+    for (const [taskId, rs] of byTask) {
+        const impact = rs.reduce((s, r) => s + (SEVERITY_SCORE[r.gapSeverity || 'low'] ?? 3), 0) / rs.length;
+        items.push({
+            taskId,
+            taskLabel: rs[0].taskLabel || taskId,
+            blockedTurns: rs.length,
+            impact: Math.round(impact * 10) / 10,
+            debtScore: Math.round(rs.length * impact),
+        });
+    }
+    return items.sort((a, b) => b.debtScore - a.debtScore);
+}
+
+export function formatNeedsMarkdown(needs: NeedItem[], inventory: InventoryItem[], debt: DebtItem[] = []): string {
+    const lines: string[] = [];
+    lines.push('# 학습 필요성 (Need Engine)');
+    lines.push('');
+    lines.push('공식: 정보부족 30% + 실패율 25% + 빈도 20% + 확신부족 15% + 피드백 10%');
+    lines.push('');
+    if (needs.length === 0) {
+        lines.push('Reflection 기록 없음 — 업무 turn 이 쌓이면 학습 우선순위가 산출됩니다.');
+    } else {
+        lines.push('| 우선순위 | 업무 | Need Score | 표본 | 근거 |');
+        lines.push('|---|---|---|---|---|');
+        needs.forEach((n, i) => {
+            lines.push(`| ${i + 1} | ${n.taskLabel} | **${n.score}** | ${n.sampleCount} | ${n.reason} |`);
+        });
+    }
+    lines.push('');
+    lines.push('## Knowledge Inventory (지식 보유 상태)');
+    lines.push('');
+    if (inventory.length === 0) {
+        lines.push('- 데이터 없음');
+    } else {
+        const statusLabel = { sufficient: '보유', partial: '부족', missing: '없음' } as const;
+        lines.push('| 업무 | 상태 | 평균 근거 수 | 평균 top score |');
+        lines.push('|---|---|---|---|');
+        for (const it of inventory) {
+            lines.push(`| ${it.taskLabel} | ${statusLabel[it.status]} | ${it.avgChunkCount.toFixed(1)} | ${it.avgTopScore.toFixed(2)} |`);
+        }
+    }
+    lines.push('');
+    lines.push('## Knowledge Debt (지식 부채)');
+    lines.push('');
+    if (debt.length === 0) {
+        lines.push('- 부채 없음 — 지식 부족 상태로 수행된 업무가 없습니다.');
+    } else {
+        lines.push('| 업무 | Blocked Turns | Impact (0~10) | Debt Score |');
+        lines.push('|---|---|---|---|');
+        for (const d of debt) {
+            lines.push(`| ${d.taskLabel} | ${d.blockedTurns} | ${d.impact} | **${d.debtScore}** |`);
+        }
+    }
+    lines.push('');
+    return lines.join('\n');
+}
@@ -0,0 +1,68 @@
+/**
+ * Organizational Memory — 조직 규칙·프로세스·선호 방식 시스템 프롬프트 블록.
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 5 / Track 5-2 (설계서 11장 Organizational
+ * Memory). "이 회사는 속도 우선, 완벽주의 지양" 류의 조직 문화·업무 방식을
+ * 모든 업무 turn 에 주입한다.
+ *
+ * Terminology Dictionary 와 같은 "파일이 UI" 패턴 — 사용자가
+ * <brain>/.astra/organization.md 를 직접 편집하면 다음 turn 부터 반영.
+ * 파일이 없으면 no-op. (User Memory 는 기존 LongTermMemory 가 담당 — 이 블록은
+ * 검색 score 와 무관하게 *항상* 주입되어야 하는 불변 조직 규칙용.)
+ *
+ * 권장 파일 구조 (자유 형식 markdown):
+ *   ## 업무 방식  / ## 보고 형식  / ## 의사결정 원칙  / ## 금지 사항
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+
+export const ORG_MEMORY_REL_PATH = path.join('.astra', 'organization.md');
+
+export interface OrgMemoryBlockOptions {
+    /** 본문 최대 길이 (chars) — 시스템 프롬프트 비대 방지. 기본 3000. */
+    maxBodyLength: number;
+}
+
+export const DEFAULT_ORG_MEMORY_OPTIONS: OrgMemoryBlockOptions = {
+    maxBodyLength: 3000,
+};
+
+/**
+ * 블록 생성 — brainPath 의 organization.md 를 읽어 주입. 파일 없음/읽기 실패 → ''.
+ * mtime 캐시 없이 매 turn 직접 읽음 (파일이 작고, 편집 즉시 반영이 더 중요).
+ */
+export function buildOrgMemoryBlock(brainPath: string, options: Partial<OrgMemoryBlockOptions> = {}): string {
+    const opts: OrgMemoryBlockOptions = { ...DEFAULT_ORG_MEMORY_OPTIONS, ...options };
+    let raw = '';
+    try {
+        const file = path.join(brainPath, ORG_MEMORY_REL_PATH);
+        if (!fs.existsSync(file)) return '';
+        raw = fs.readFileSync(file, 'utf8').trim();
+    } catch {
+        return '';
+    }
+    if (!raw) return '';
+
+    let body = raw;
+    let truncated = false;
+    if (body.length > opts.maxBodyLength) {
+        body = body.slice(0, opts.maxBodyLength);
+        truncated = true;
+    }
+
+    const lines: string[] = [];
+    lines.push('[ORGANIZATIONAL MEMORY]');
+    lines.push('아래는 이 조직의 업무 방식·규칙·선호다. 업무 산출물(회의록/조사/일정)은 이 방식을 *항상* 따를 것.');
+    lines.push('사용자 명시 지시와 충돌하면 사용자 지시 우선 (Human Override).');
+    lines.push('');
+    lines.push('---');
+    lines.push(body);
+    if (truncated) {
+        lines.push('');
+        lines.push(`_…(${raw.length - opts.maxBodyLength}자 잘림 — 핵심 규칙을 앞쪽에 배치해 주세요)_`);
+    }
+    lines.push('---');
+    lines.push('[/ORGANIZATIONAL MEMORY]');
+    return lines.join('\n');
+}
@@ -0,0 +1,162 @@
+/**
+ * Reflection Store — 업무 turn 회고 기록 + Failure Pattern 집계.
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 1 / Track 2-4 (Reflection Engine v1) +
+ * Phase 3 / Track 3-6 (Failure Pattern DB v1 시드). 신뢰 조건 T5
+ * "같은 실수를 반복하지 않는다" 의 데이터 기반.
+ *
+ * v1 은 결정론적 신호만 기록 (LLM 회고 질문은 후속 증분):
+ *   업무 turn 종료 → {업무유형, 확신도, 누락 요소, 에스컬레이션 여부, Critic 이슈 수}
+ *   를 <brain>/.astra/growth/reflections.jsonl 에 append.
+ *
+ * 이 파일이 쌓이면:
+ *   - summarizeFailurePatterns() → "회의록·기한 누락 N회" 류 반복 실수 집계
+ *   - formatGrowthReport() → 기간별 확신도/누락률 추이 = *성장세 그래프의 원천*
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+
+export const REFLECTIONS_REL_PATH = path.join('.astra', 'growth', 'reflections.jsonl');
+
+export interface ReflectionRecord {
+    /** ISO timestamp. */
+    ts: string;
+    taskId: string | null;
+    taskLabel: string | null;
+    confidenceScore: number;
+    confidenceBand: string;
+    /** 커버리지 누락 요소 label 목록. */
+    missing: string[];
+    escalated: boolean;
+    /** Critic 검수가 돌았으면 발견 이슈 수, 안 돌았으면 null. */
+    criticIssues: number | null;
+    /** 요청 미리보기 (디버그·회고용, 120자). */
+    promptPreview: string;
+
+    // ── Decision Journal v1 (Track 3-7) — "왜 이 확신도/판단이었나" 역추적 필드 ──
+    /** 확신도 기여 요인 label 목록 (confidenceEngine factors). */
+    factors?: string[];
+    /** 답변에 쓰인 상위 출처 title (citation/selfCheckSources 기준). */
+    usedSources?: string[];
+
+    // ── Gap Detector v1 (Track 3-2) — Need Engine 입력 신호 ──
+    /** 검색 그라운딩: 청크 수·최고 score. */
+    retrieval?: { chunkCount: number; topScore: number };
+    /** 검색 근거 없이/약하게 수행한 업무 turn (지식 갭 신호). */
+    weakGrounding?: boolean;
+    /** 갭 심각도 (none/low/medium/high). */
+    gapSeverity?: string;
+}
+
+/** 회고 1건 append — 실패해도 throw 하지 않음 (회고가 turn 을 막으면 안 됨). */
+export function appendReflection(brainPath: string, record: ReflectionRecord): boolean {
+    try {
+        if (!brainPath) return false;
+        const file = path.join(brainPath, REFLECTIONS_REL_PATH);
+        fs.mkdirSync(path.dirname(file), { recursive: true });
+        fs.appendFileSync(file, JSON.stringify(record) + '\n', 'utf8');
+        return true;
+    } catch {
+        return false;
+    }
+}
+
+/** 회고 로드 — 깨진 줄은 무시. limit 은 *최근* N건. */
+export function loadReflections(brainPath: string, limit?: number): ReflectionRecord[] {
+    try {
+        const file = path.join(brainPath, REFLECTIONS_REL_PATH);
+        if (!fs.existsSync(file)) return [];
+        const lines = fs.readFileSync(file, 'utf8').split('\n').filter((l) => l.trim());
+        const records: ReflectionRecord[] = [];
+        for (const line of lines) {
+            try {
+                const obj = JSON.parse(line);
+                if (obj && typeof obj.ts === 'string') records.push(obj as ReflectionRecord);
+            } catch { /* skip broken line */ }
+        }
+        return limit && limit > 0 ? records.slice(-limit) : records;
+    } catch {
+        return [];
+    }
+}
+
+export interface FailurePattern {
+    taskId: string;
+    taskLabel: string;
+    element: string;
+    count: number;
+}
+
+/**
+ * Failure Pattern 집계 — (업무유형 × 누락 요소) 별 반복 횟수, 많은 순.
+ * "시장규모 누락 27회" 류의 반복 실수를 수치로 노출 (설계서 12장).
+ */
+export function summarizeFailurePatterns(records: ReflectionRecord[]): FailurePattern[] {
+    const counts = new Map<string, FailurePattern>();
+    for (const r of records) {
+        if (!r.taskId) continue;
+        for (const el of r.missing || []) {
+            const key = `${r.taskId}::${el}`;
+            const cur = counts.get(key);
+            if (cur) cur.count++;
+            else counts.set(key, { taskId: r.taskId, taskLabel: r.taskLabel || r.taskId, element: el, count: 1 });
+        }
+    }
+    return Array.from(counts.values()).sort((a, b) => b.count - a.count);
+}
+
+/**
+ * 반복 실수 경고 — 같은 (업무 × 요소) 누락이 threshold 회 이상이면 해당 요소를
+ * 시스템 프롬프트 강조 대상으로 반환. Requirement Graph 블록이 이걸 받아
+ * "특히 자주 누락되는 요소" 로 표시 (T5 루프의 첫 닫힘).
+ */
+export function recurrentMisses(records: ReflectionRecord[], taskId: string, threshold = 3): string[] {
+    return summarizeFailurePatterns(records)
+        .filter((p) => p.taskId === taskId && p.count >= threshold)
+        .map((p) => p.element);
+}
+
+/** 기간(주) 단위 성장 리포트 — 확신도 평균·누락률 추이. */
+export function formatGrowthReport(records: ReflectionRecord[]): string {
+    if (records.length === 0) return '# 성장 리포트\n\n기록 없음 — 업무 turn 이 쌓이면 추이가 표시됩니다.\n';
+
+    // 주 단위 버킷 (ISO week 근사 — ts 앞 10자의 날짜 기준 7일 묶음).
+    const byWeek = new Map<string, ReflectionRecord[]>();
+    for (const r of records) {
+        const d = new Date(r.ts);
+        if (isNaN(d.getTime())) continue;
+        const weekStart = new Date(d);
+        weekStart.setDate(d.getDate() - d.getDay()); // 일요일 기준
+        const key = weekStart.toISOString().slice(0, 10);
+        const arr = byWeek.get(key) || [];
+        arr.push(r);
+        byWeek.set(key, arr);
+    }
+
+    const lines: string[] = [];
+    lines.push('# ASTRA 성장 리포트 (Reflection 기반)');
+    lines.push('');
+    lines.push(`총 업무 turn: ${records.length}`);
+    lines.push('');
+    lines.push('| 주 (시작일) | 업무 수 | 평균 확신도 | 요소 누락률 | 에스컬레이션 |');
+    lines.push('|---|---|---|---|---|');
+    const weeks = Array.from(byWeek.keys()).sort();
+    for (const w of weeks) {
+        const rs = byWeek.get(w)!;
+        const avgConf = rs.reduce((s, r) => s + (r.confidenceScore || 0), 0) / rs.length;
+        const missRate = rs.filter((r) => (r.missing || []).length > 0).length / rs.length;
+        const escCount = rs.filter((r) => r.escalated).length;
+        lines.push(`| ${w} | ${rs.length} | ${avgConf.toFixed(0)} | ${(missRate * 100).toFixed(0)}% | ${escCount} |`);
+    }
+    lines.push('');
+    lines.push('## 반복 실수 Top (Failure Patterns)');
+    const patterns = summarizeFailurePatterns(records).slice(0, 10);
+    if (patterns.length === 0) {
+        lines.push('- 없음');
+    } else {
+        for (const p of patterns) lines.push(`- ${p.taskLabel} · **${p.element}** 누락 ${p.count}회`);
+    }
+    lines.push('');
+    return lines.join('\n');
+}
@@ -0,0 +1,273 @@
+/**
+ * Requirement Graph — 업무 유형별 필수 요소 정의 + 감지 + 커버리지 검사.
+ *
+ * Self-Evolving Digital Employee OS 마스터 플랜(docs/SELF_EVOLVING_OS_MASTER_PLAN.md)
+ * Phase 1 / Track 2-1. 신뢰 조건 T3 "품질이 일관적이다 — 필수 요소 누락 없음" 담당.
+ *
+ * 동작 2단계:
+ *   1. *Instructional* — 사용자 요청에서 업무 유형(회의록/시장조사/업무조사/일정) 감지 시
+ *      [TASK REQUIREMENTS] 블록을 시스템 프롬프트에 주입 → 모델이 필수 요소를 빠짐없이 작성.
+ *      정보가 없어 채울 수 없는 요소는 "(확인 필요)" 로 명시하게 강제 — 조용한 생략 금지
+ *      (Anti-Hallucination T1 과 연결).
+ *   2. *Deterministic* — 답변 완료 후 post-answer hook 이 필수 요소 커버리지를 정규식으로
+ *      스캔, 누락 가능 요소를 footer 로 표시 (termValidator 와 같은 패턴, LLM 호출 없음).
+ *
+ * Gap Detector (Phase 3) 가 이 모듈의 Requirement 정의를 입력으로 사용한다:
+ * Gap = Requirement − Knowledge.
+ */
+
+export interface RequirementElement {
+    /** 안정적 식별자 (Failure Pattern DB 가 누락 카운트 키로 사용 예정). */
+    id: string;
+    /** 사람이 읽는 요소명 — 블록·footer 에 표시. */
+    label: string;
+    /** 모델에게 주는 작성 힌트. */
+    hint: string;
+    /** 커버리지 검사용 정규식 소스 (OR 결합, i+u 플래그). */
+    detectPatterns: string[];
+}
+
+export interface TaskRequirement {
+    /** 업무 유형 ID (예: 'meeting-minutes'). */
+    id: string;
+    /** 사람이 읽는 업무명 (예: '회의록'). */
+    label: string;
+    /** 사용자 요청에서 업무 유형을 감지하는 정규식 소스 (OR). */
+    detectKeywords: string[];
+    /**
+     * 답변 커버리지 검사 여부. 일정 등 짧은 확인형 응답이 정상인 업무는 false —
+     * footer 노이즈(false-positive) 방지. 블록 주입은 항상 수행.
+     */
+    coverageCheck: boolean;
+    elements: RequirementElement[];
+}
+
+export interface CoverageResult {
+    ran: boolean;
+    taskId?: string;
+    taskLabel?: string;
+    covered: string[];   // element labels
+    missing: string[];   // element labels
+}
+
+/**
+ * 기본 업무 정의 4종. 배열 순서 = 감지 우선순위 (구체적 유형 먼저, 범용 '업무조사' 마지막 —
+ * "조사" 류 키워드가 시장조사를 가로채지 않도록).
+ */
+export const DEFAULT_TASK_REQUIREMENTS: TaskRequirement[] = [
+    {
+        id: 'meeting-minutes',
+        label: '회의록',
+        detectKeywords: ['회의록', '회의 ?(내용|결과)? ?정리', '미팅 ?(노트|정리)', 'meeting (minutes|notes)'],
+        coverageCheck: true,
+        elements: [
+            {
+                id: 'attendees', label: '참석자',
+                hint: '회의 참석 인원 전원. 불명확하면 "(확인 필요)".',
+                detectPatterns: ['참석자', '참석 ?인원', 'attendees?'],
+            },
+            {
+                id: 'decisions', label: '결정사항',
+                hint: '회의에서 합의·확정된 사항. 논의만 되고 미결인 항목과 구분.',
+                detectPatterns: ['결정 ?사항', '결정된', '합의', '확정', 'decisions?'],
+            },
+            {
+                id: 'action-items', label: '액션 아이템',
+                hint: '후속 실행 항목. 각 항목에 담당자·기한 연결.',
+                detectPatterns: ['액션 ?아이템', 'action ?items?', '할 ?일', '후속 ?(조치|작업)', 'to-?do'],
+            },
+            {
+                id: 'owners', label: '담당자',
+                hint: '액션 아이템별 책임자. 미정이면 "(담당자 미정)" 명시.',
+                detectPatterns: ['담당자?', '책임자', 'owner'],
+            },
+            {
+                id: 'due-dates', label: '기한',
+                hint: '액션 아이템별 마감일. 미정이면 "(기한 미정)" 명시.',
+                detectPatterns: ['기한', '마감', '까지', 'due', '\\d{1,2}\\s*월\\s*\\d{1,2}\\s*일'],
+            },
+        ],
+    },
+    {
+        id: 'market-research',
+        label: '시장조사',
+        detectKeywords: ['시장 ?조사', '시장 ?분석', '시장 ?(규모|동향|현황)', 'market (research|analysis)'],
+        coverageCheck: true,
+        elements: [
+            {
+                id: 'market-size', label: '시장 규모',
+                hint: '금액/수량 기준 규모. 수치 출처 필수, 없으면 "(확인 필요)".',
+                detectPatterns: ['시장 ?규모', 'market ?size', '\\d+\\s*(억|조|만\\s*달러|billion|million)'],
+            },
+            {
+                id: 'growth', label: '성장률',
+                hint: '연 성장률(CAGR 등) 또는 성장 추세.',
+                detectPatterns: ['성장률', '성장세', 'CAGR', 'growth', '연평균'],
+            },
+            {
+                id: 'competitors', label: '경쟁사',
+                hint: '주요 플레이어와 각자의 포지션.',
+                detectPatterns: ['경쟁사', '경쟁 ?업체', '주요 ?(업체|기업|플레이어)', 'competitors?'],
+            },
+            {
+                id: 'pricing', label: '가격',
+                hint: '가격대·요금 구조.',
+                detectPatterns: ['가격', '요금', '단가', 'pricing', '원대', '달러'],
+            },
+            {
+                id: 'customer-needs', label: '고객 니즈',
+                hint: '고객 요구·페인 포인트.',
+                detectPatterns: ['니즈', '고객 ?(요구|수요)', '페인 ?포인트', 'needs', 'pain ?points?'],
+            },
+            {
+                id: 'trends', label: '트렌드',
+                hint: '시장 동향·변화 방향.',
+                detectPatterns: ['트렌드', '동향', '추세', 'trends?'],
+            },
+            {
+                id: 'sources', label: '출처',
+                hint: '핵심 수치·주장의 출처. 모델 일반 지식이면 그렇게 명시.',
+                detectPatterns: ['출처', '근거', 'source', '자료:', '참고'],
+            },
+        ],
+    },
+    {
+        id: 'schedule',
+        label: '일정 관리',
+        detectKeywords: ['일정 ?(등록|추가|확인|조회|정리|관리)', '스케줄', '캘린더', '미팅 ?잡', '약속 ?(등록|추가|잡)'],
+        coverageCheck: false, // 짧은 확인형 응답이 정상 — footer 검사는 노이즈
+        elements: [
+            {
+                id: 'datetime', label: '일시',
+                hint: '날짜와 시간을 명시. 모호하면 되묻기.',
+                detectPatterns: ['\\d{1,2}\\s*[:시]', '날짜', '일시'],
+            },
+            {
+                id: 'title', label: '일정 제목',
+                hint: '무엇을 위한 일정인지.',
+                detectPatterns: ['제목', '일정명', '건명'],
+            },
+            {
+                id: 'conflict-check', label: '충돌 확인',
+                hint: '기존 일정과 겹침 여부 확인 결과 명시.',
+                detectPatterns: ['충돌', '겹치', '겹침'],
+            },
+        ],
+    },
+    {
+        id: 'work-research',
+        label: '업무조사',
+        detectKeywords: ['업무 ?조사', '조사해', '리서치', '알아봐\\s*줘?', '서치해', 'research'],
+        coverageCheck: true,
+        elements: [
+            {
+                id: 'purpose', label: '조사 목적',
+                hint: '무엇을 알기 위한 조사인지 한 줄 명시.',
+                detectPatterns: ['목적', '배경', '알아보기 위해'],
+            },
+            {
+                id: 'summary', label: '핵심 요약',
+                hint: '결론 먼저 — 3줄 이내 요약.',
+                detectPatterns: ['요약', '핵심', '결론부터', 'TL;?DR', 'summary'],
+            },
+            {
+                id: 'details', label: '세부 내용',
+                hint: '요약을 뒷받침하는 상세 조사 내용.',
+                detectPatterns: ['상세', '세부', '구체적', '자세히'],
+            },
+            {
+                id: 'sources', label: '출처',
+                hint: '핵심 주장의 출처. 모델 일반 지식이면 그렇게 명시.',
+                detectPatterns: ['출처', '근거', 'source', '참고'],
+            },
+            {
+                id: 'implications', label: '시사점·다음 단계',
+                hint: '조사 결과가 의미하는 것과 권장 다음 행동.',
+                detectPatterns: ['시사점', '다음 ?단계', '권장', '제안', '결론'],
+            },
+        ],
+    },
+];
+
+function toRegex(sources: string[]): RegExp {
+    return new RegExp(sources.join('|'), 'iu');
+}
+
+/**
+ * 사용자 요청에서 업무 유형 감지. 배열 순서대로 첫 매치 반환, 없으면 null.
+ * 짧은 인사·일반 잡담은 키워드 미매치로 자연스럽게 제외.
+ */
+export function detectTaskType(
+    userPrompt: string,
+    requirements: TaskRequirement[] = DEFAULT_TASK_REQUIREMENTS,
+): TaskRequirement | null {
+    if (!userPrompt || !userPrompt.trim()) return null;
+    for (const req of requirements) {
+        if (toRegex(req.detectKeywords).test(userPrompt)) return req;
+    }
+    return null;
+}
+
+/**
+ * [TASK REQUIREMENTS] 시스템 프롬프트 블록 생성. 업무 유형 미감지 시 빈 문자열 —
+ * memoryContext 의 dynamicBlocks join 에서 자동 제외.
+ */
+export function buildRequirementGraphBlock(
+    userPrompt: string,
+    requirements: TaskRequirement[] = DEFAULT_TASK_REQUIREMENTS,
+    /** 과거 자주 누락된 요소 label — Reflection/Failure Pattern 이 공급 (T5: 같은 실수 반복 방지). */
+    emphasizeLabels: string[] = [],
+): string {
+    const req = detectTaskType(userPrompt, requirements);
+    if (!req) return '';
+
+    const emphasize = new Set(emphasizeLabels);
+    const lines: string[] = [];
+    lines.push(`[TASK REQUIREMENTS — ${req.label}]`);
+    lines.push(`이 요청은 '${req.label}' 업무로 감지됨. 아래 필수 요소를 *모두* 포함해 작성할 것.`);
+    lines.push('정보가 없어 채울 수 없는 요소는 조용히 생략하지 말고 "(확인 필요)" 로 명시 후 사용자에게 질문.');
+    lines.push('');
+    for (const el of req.elements) {
+        const mark = emphasize.has(el.label) ? ' ⚠️ *과거에 자주 누락된 요소 — 특히 주의*' : '';
+        lines.push(`- [ ] **${el.label}** — ${el.hint}${mark}`);
+    }
+    lines.push('');
+    lines.push('제출 전 위 체크리스트를 스스로 점검하고, 누락 요소가 있으면 보완 후 답변할 것.');
+    lines.push('[/TASK REQUIREMENTS]');
+    return lines.join('\n');
+}
+
+/**
+ * 답변 커버리지 결정론적 검사 — 각 필수 요소의 detectPatterns 가 답변에 하나도 안 나타나면
+ * missing. LLM 호출 없음 (정규식), 매 turn 안전.
+ *
+ * 한계(의도된 보수성): 패턴 매치 = "요소가 언급됨" 이지 "내용이 충실함" 이 아님.
+ * 내용 충실도 평가는 Phase 3 Self Evaluation 담당.
+ */
+export function checkRequirementCoverage(
+    userPrompt: string,
+    assistantAnswer: string,
+    requirements: TaskRequirement[] = DEFAULT_TASK_REQUIREMENTS,
+): CoverageResult {
+    const req = detectTaskType(userPrompt, requirements);
+    if (!req || !req.coverageCheck || !assistantAnswer || !assistantAnswer.trim()) {
+        return { ran: false, covered: [], missing: [] };
+    }
+    const covered: string[] = [];
+    const missing: string[] = [];
+    for (const el of req.elements) {
+        if (toRegex(el.detectPatterns).test(assistantAnswer)) covered.push(el.label);
+        else missing.push(el.label);
+    }
+    return { ran: true, taskId: req.id, taskLabel: req.label, covered, missing };
+}
+
+/**
+ * 커버리지 footer — 누락 있을 때만 문자열 반환 (전부 충족 시 빈 문자열, 노이즈 방지).
+ * termValidator footer 와 같은 위치(답변 아래 streamChunk)에 표시.
+ */
+export function formatRequirementCoverageFooter(result: CoverageResult): string {
+    if (!result.ran || result.missing.length === 0) return '';
+    const miss = result.missing.join(', ');
+    return `\n\n> ⚠️ **Requirement Check (${result.taskLabel})** — 누락 가능 요소: ${miss}. 해당 내용이 없었다면 "(확인 필요)" 로 표시하거나 추가 정보를 요청하세요.`;
+}
@@ -0,0 +1,153 @@
+/**
+ * Research Agent — 학습 큐 approved 항목의 조사 실행 (설계서 9장).
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 6 / Track 7-1. 학습 루프의 마지막 구간:
+ *   Need Engine → Learning Queue(approved) → **Research Agent** → Validation 게이트 → 저장 제안
+ *
+ * v1 은 "조사 패키지 준비자" — 로컬 환경의 정직한 한계 안에서 동작한다:
+ *   1. 조사 브리프 생성 (LLM 1회): 핵심 질문·검색 키워드·권장 출처 유형
+ *   2. 내부 지식 현황: 두뇌 검색 상위 문서 수집 (이미 아는 것 파악)
+ *   3. 모델 지식 초안: *모든 내용에 추정 라벨* — 출처 없는 지식이므로
+ *   4. Knowledge Validation 게이트: 출처 없음 → 대부분 review (자동 저장 안 됨)
+ *   5. 다음 단계 안내: /research·/benchmark (Datacollect Bridge) 로 외부 근거 수집 후 승인
+ *
+ * 산출물은 <brain>/.astra/growth/proposals/<id>.md — 사람이 검토·보강·승인하는
+ * 제안 문서다. 두뇌 본문에 자동 저장하지 않는다 (Permission Based Learning).
+ */
+
+import type { QueueItem } from './learningQueue';
+import { validateKnowledgeCandidate, ExistingKnowledgeRef, ValidationResult } from './knowledgeValidation';
+
+export interface ResearchBrief {
+    questions: string[];
+    keywords: string[];
+    sourceTypes: string[];
+}
+
+export interface ResearchPackage {
+    queueItemId: string;
+    topic: string;
+    brief: ResearchBrief;
+    /** 내부(두뇌) 관련 문서 — 이미 아는 것. */
+    internalRefs: ExistingKnowledgeRef[];
+    /** 모델 지식 초안 (추정 라벨 포함). */
+    draft: string;
+    validation: ValidationResult;
+}
+
+export type ResearchLlmCall = (system: string, user: string, maxTokens: number) => Promise<string>;
+
+/** 브리프 JSON 파싱 — criticAgent 와 같은 균형 괄호 추출 전략의 단순판. */
+export function parseBrief(raw: string): ResearchBrief | null {
+    const start = raw.indexOf('{');
+    const end = raw.lastIndexOf('}');
+    if (start === -1 || end <= start) return null;
+    try {
+        const obj = JSON.parse(raw.slice(start, end + 1));
+        const arr = (v: any) => Array.isArray(v) ? v.filter((x) => typeof x === 'string').slice(0, 8) : [];
+        const brief: ResearchBrief = { questions: arr(obj.questions), keywords: arr(obj.keywords), sourceTypes: arr(obj.sourceTypes) };
+        return brief.questions.length > 0 ? brief : null;
+    } catch {
+        return null;
+    }
+}
+
+/** LLM 실패 시에도 루프가 멈추지 않도록 — 주제 기반 최소 브리프. */
+export function fallbackBrief(topic: string): ResearchBrief {
+    return {
+        questions: [`${topic} 의 핵심 개념과 현재 표준은 무엇인가?`, `${topic} 에서 자주 발생하는 실수와 베스트 프랙티스는?`],
+        keywords: [topic],
+        sourceTypes: ['공식 문서', '최근 1년 내 자료'],
+    };
+}
+
+export async function runResearch(params: {
+    item: QueueItem;
+    /** 두뇌에서 주제 관련 기존 문서를 가져오는 함수 (orchestrator 주입). */
+    fetchInternalRefs: (topic: string) => Promise<ExistingKnowledgeRef[]>;
+    callLlm: ResearchLlmCall;
+    nowIso: string;
+}): Promise<ResearchPackage> {
+    const { item } = params;
+
+    // ─── 1. 조사 브리프 (LLM — 계획 수립은 환각 위험이 낮은 용도) ───
+    const briefSystem = [
+        '너는 조사 계획 수립자다. 주어진 학습 주제에 대한 조사 브리프를 만든다.',
+        '반드시 아래 JSON 만 출력:',
+        '{"questions": ["핵심 질문 3~5개"], "keywords": ["검색 키워드 3~6개"], "sourceTypes": ["권장 출처 유형 2~4개"]}',
+    ].join('\n');
+    let brief: ResearchBrief;
+    try {
+        const raw = await params.callLlm(briefSystem, `학습 주제: ${item.topic}\n선정 사유: ${item.reason}`, 400);
+        brief = parseBrief(raw) ?? fallbackBrief(item.topic);
+    } catch {
+        brief = fallbackBrief(item.topic);
+    }
+
+    // ─── 2. 내부 지식 현황 ───
+    let internalRefs: ExistingKnowledgeRef[] = [];
+    try {
+        internalRefs = await params.fetchInternalRefs(item.topic);
+    } catch { /* 검색 실패 → 빈 현황으로 진행 */ }
+
+    // ─── 3. 모델 지식 초안 — 전부 추정 라벨 강제 ───
+    const draftSystem = [
+        '너는 학습 노트 초안 작성자다. 주어진 질문들에 대해 아는 것을 정리한다.',
+        '중요: 너의 일반 지식은 출처가 없다. 모든 단락 끝에 "(모델 지식 — 추정, 출처 확인 필요)" 를 붙일 것.',
+        '모르는 것은 "모름 — 외부 조사 필요" 로 솔직히 표시. 지어내기 금지.',
+        '마크다운 ## 섹션으로 질문별 정리.',
+    ].join('\n');
+    let draft = '';
+    try {
+        draft = await params.callLlm(draftSystem, brief.questions.map((q, i) => `${i + 1}. ${q}`).join('\n'), 1200);
+    } catch {
+        draft = '(초안 생성 실패 — 외부 조사로 직접 작성 필요)';
+    }
+
+    // ─── 4. Validation 게이트 — 출처 없는 초안은 자동 수용되지 않는다 ───
+    const validation = validateKnowledgeCandidate(
+        { title: item.topic, content: draft, collectedAt: params.nowIso /* source 의도적 누락 */ },
+        internalRefs,
+    );
+
+    return { queueItemId: item.id, topic: item.topic, brief, internalRefs, draft, validation };
+}
+
+export function formatProposalMarkdown(pkg: ResearchPackage, meta: { dateStr: string; modelName: string }): string {
+    const lines: string[] = [];
+    lines.push(`# 학습 제안 — ${pkg.topic}`);
+    lines.push('');
+    lines.push(`- 생성: ${meta.dateStr} · 모델: ${meta.modelName} · 큐 항목: ${pkg.queueItemId}`);
+    lines.push(`- **검증 판정: ${pkg.validation.verdict}** — ${pkg.validation.reasons.join(' / ')}`);
+    lines.push('');
+    lines.push('## 1. 조사 브리프');
+    lines.push('');
+    lines.push('**핵심 질문**');
+    for (const q of pkg.brief.questions) lines.push(`- ${q}`);
+    lines.push('');
+    lines.push(`**검색 키워드**: ${pkg.brief.keywords.join(', ')}`);
+    lines.push(`**권장 출처**: ${pkg.brief.sourceTypes.join(', ')}`);
+    lines.push('');
+    lines.push('## 2. 내부 지식 현황 (두뇌에 이미 있는 것)');
+    lines.push('');
+    if (pkg.internalRefs.length === 0) {
+        lines.push('- 관련 문서 없음 — 완전한 신규 영역');
+    } else {
+        for (const ref of pkg.internalRefs) {
+            lines.push(`- \`${ref.filePath || ref.title}\``);
+        }
+    }
+    lines.push('');
+    lines.push('## 3. 모델 지식 초안 (출처 없음 — 검증 전 사용 금지)');
+    lines.push('');
+    lines.push(pkg.draft);
+    lines.push('');
+    lines.push('## 4. 다음 단계');
+    lines.push('');
+    lines.push('1. 위 키워드로 외부 근거 수집 — ASTRA 채팅에서 `/research` 또는 `/benchmark` (Datacollect Bridge 필요)');
+    lines.push('2. 수집 근거로 초안을 보강·교정 (추정 라벨 제거는 출처 확보 후에만)');
+    lines.push('3. 완성본을 두뇌 적절한 폴더에 저장하면 다음 turn 부터 검색에 반영됨');
+    lines.push('4. learning-queue.json 에서 이 항목 status 를 done 으로 변경');
+    lines.push('');
+    return lines.join('\n');
+}
@@ -0,0 +1,168 @@
+/**
+ * Skill Score + Success Pattern DB — 역량 점수와 성공 사례 축적 (설계서 12장).
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 6 / Track 7-3 + 7-4.
+ *
+ * Skill Score (0~100, 업무 유형 단위 v1):
+ *   최근 N건 reflection 의 (확신도 50% + 요소 충족률 30% + 비에스컬레이션율 20%).
+ *   전반기/후반기 비교로 추세(↑/→/↓)를 산출 — "SEO 52→81" 류 성장 표시의 원천.
+ *
+ * Success Pattern DB:
+ *   전 요소 충족 + 확신도 high 인 업무 turn 을 <brain>/.astra/growth/success-patterns.jsonl
+ *   에 적재. v1 은 기록·집계 (향후 증분: 신규 업무 turn 에 모범 사례로 주입).
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+import type { ReflectionRecord } from './reflectionStore';
+
+export const SUCCESS_PATTERNS_REL_PATH = path.join('.astra', 'growth', 'success-patterns.jsonl');
+
+// ─────────────────────────── Skill Score ───────────────────────────
+
+export interface SkillScoreItem {
+    taskId: string;
+    taskLabel: string;
+    /** 0~100. */
+    score: number;
+    /** 전반기 대비 후반기 — 'up' | 'flat' | 'down'. 표본 4건 미만이면 'flat'. */
+    trend: 'up' | 'flat' | 'down';
+    /** 전반기/후반기 점수 (추세 근거). */
+    firstHalf: number;
+    secondHalf: number;
+    sampleCount: number;
+}
+
+function scoreOf(rs: ReflectionRecord[]): number {
+    if (rs.length === 0) return 0;
+    const avgConf = rs.reduce((s, r) => s + (r.confidenceScore || 0), 0) / rs.length;
+    const coverOk = rs.filter((r) => (r.missing || []).length === 0).length / rs.length;
+    const noEsc = rs.filter((r) => !r.escalated).length / rs.length;
+    return Math.round(avgConf * 0.5 + coverOk * 100 * 0.3 + noEsc * 100 * 0.2);
+}
+
+export function computeSkillScores(records: ReflectionRecord[]): SkillScoreItem[] {
+    const byTask = new Map<string, ReflectionRecord[]>();
+    for (const r of records) {
+        if (!r.taskId) continue;
+        const arr = byTask.get(r.taskId) || [];
+        arr.push(r);
+        byTask.set(r.taskId, arr);
+    }
+    const items: SkillScoreItem[] = [];
+    for (const [taskId, rs] of byTask) {
+        // ts 순 정렬 후 전/후반 비교.
+        const sorted = rs.slice().sort((a, b) => a.ts.localeCompare(b.ts));
+        const mid = Math.floor(sorted.length / 2);
+        const firstHalf = scoreOf(sorted.slice(0, mid));
+        const secondHalf = scoreOf(sorted.slice(mid));
+        let trend: SkillScoreItem['trend'] = 'flat';
+        if (sorted.length >= 4) {
+            if (secondHalf - firstHalf >= 5) trend = 'up';
+            else if (firstHalf - secondHalf >= 5) trend = 'down';
+        }
+        items.push({
+            taskId,
+            taskLabel: sorted[0].taskLabel || taskId,
+            score: scoreOf(sorted),
+            trend,
+            firstHalf,
+            secondHalf,
+            sampleCount: sorted.length,
+        });
+    }
+    return items.sort((a, b) => b.score - a.score);
+}
+
+export function formatSkillScoresMarkdown(items: SkillScoreItem[]): string {
+    const lines: string[] = [];
+    lines.push('## Skill Score (역량 점수)');
+    lines.push('');
+    lines.push('확신도 50% + 요소 충족률 30% + 비에스컬레이션율 20%. 추세는 전/후반기 비교 (표본 4건+).');
+    lines.push('');
+    if (items.length === 0) {
+        lines.push('- 데이터 없음');
+        return lines.join('\n');
+    }
+    const arrow = { up: '📈 상승', flat: '→ 유지', down: '📉 하락' } as const;
+    lines.push('| 업무 | Score | 추세 | 전반기→후반기 | 표본 |');
+    lines.push('|---|---|---|---|---|');
+    for (const it of items) {
+        lines.push(`| ${it.taskLabel} | **${it.score}** | ${arrow[it.trend]} | ${it.firstHalf}→${it.secondHalf} | ${it.sampleCount} |`);
+    }
+    return lines.join('\n');
+}
+
+// ─────────────────────── Success Pattern DB ───────────────────────
+
+export interface SuccessPattern {
+    ts: string;
+    taskId: string;
+    taskLabel: string;
+    confidenceScore: number;
+    promptPreview: string;
+    usedSources: string[];
+}
+
+/** 성공 판정 — 전 요소 충족 + 확신도 high(90+). */
+export function isSuccessTurn(record: ReflectionRecord): boolean {
+    return !!record.taskId
+        && (record.missing || []).length === 0
+        && record.confidenceScore >= 90;
+}
+
+export function appendSuccessPattern(brainPath: string, record: ReflectionRecord): boolean {
+    try {
+        if (!isSuccessTurn(record)) return false;
+        const file = path.join(brainPath, SUCCESS_PATTERNS_REL_PATH);
+        fs.mkdirSync(path.dirname(file), { recursive: true });
+        const pattern: SuccessPattern = {
+            ts: record.ts,
+            taskId: record.taskId!,
+            taskLabel: record.taskLabel || record.taskId!,
+            confidenceScore: record.confidenceScore,
+            promptPreview: record.promptPreview,
+            usedSources: record.usedSources || [],
+        };
+        fs.appendFileSync(file, JSON.stringify(pattern) + '\n', 'utf8');
+        return true;
+    } catch {
+        return false;
+    }
+}
+
+export function loadSuccessPatterns(brainPath: string, limit?: number): SuccessPattern[] {
+    try {
+        const file = path.join(brainPath, SUCCESS_PATTERNS_REL_PATH);
+        if (!fs.existsSync(file)) return [];
+        const lines = fs.readFileSync(file, 'utf8').split('\n').filter((l) => l.trim());
+        const out: SuccessPattern[] = [];
+        for (const line of lines) {
+            try {
+                const obj = JSON.parse(line);
+                if (obj && typeof obj.ts === 'string' && typeof obj.taskId === 'string') out.push(obj);
+            } catch { /* skip */ }
+        }
+        return limit && limit > 0 ? out.slice(-limit) : out;
+    } catch {
+        return [];
+    }
+}
+
+export function formatSuccessPatternsMarkdown(patterns: SuccessPattern[]): string {
+    const lines: string[] = [];
+    lines.push('## Success Patterns (성공 사례)');
+    lines.push('');
+    if (patterns.length === 0) {
+        lines.push('- 아직 없음 — 전 요소 충족 + 확신도 90+ 인 업무가 자동 축적됩니다.');
+        return lines.join('\n');
+    }
+    const byTask = new Map<string, number>();
+    for (const p of patterns) byTask.set(p.taskLabel, (byTask.get(p.taskLabel) || 0) + 1);
+    lines.push(`총 ${patterns.length}건 — ${Array.from(byTask.entries()).map(([l, c]) => `${l} ${c}건`).join(' · ')}`);
+    lines.push('');
+    for (const p of patterns.slice(-5).reverse()) {
+        lines.push(`- [${p.ts.slice(0, 10)}] ${p.taskLabel} (확신도 ${p.confidenceScore}) — "${p.promptPreview.slice(0, 60)}"`);
+    }
+    return lines.join('\n');
+}
@@ -0,0 +1,185 @@
+/**
+ * Task Eval Harness — 업무 산출물 골든셋 자동 채점 (Self Evaluation v1).
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 3 / Track 3-4. "성장세를 숫자로 증명" 의 핵심:
+ * 같은 골든셋을 버전마다 돌려 점수 추이를 비교한다 (검색 평가 하니스가 recall@1
+ * 37.5%→75% 를 증명한 것과 같은 방법론을 업무 산출물에 적용).
+ *
+ * 골든셋: <brain>/.astra/eval/tasks/<task>.golden.jsonl
+ *   한 줄 = {"id","query","sourceFile","expectedElements":[label...],"reference","notes"}
+ *
+ * v1 채점은 결정론적 (LLM-judge 는 후속 증분):
+ *   - 요소 커버리지: expectedElements 의 detectPatterns 매치율 (requirementGraph 어휘 재사용)
+ *   - 정직성: "(확인 필요)" 류 표시 사용 여부 (지어내기 대신 모름 인정 — T1)
+ *   - 길이·구조: 섹션 헤딩 수
+ * LLM 호출(생성)은 주입(generate) — 하니스 자체는 순수, 테스트 가능.
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+import { DEFAULT_TASK_REQUIREMENTS, TaskRequirement } from './requirementGraph';
+
+export const TASK_GOLDEN_DIR = path.join('.astra', 'eval', 'tasks');
+
+export interface TaskGoldenRecord {
+    id: string;
+    query: string;
+    sourceFile: string;
+    meetingTopic?: string;
+    expectedElements: string[];
+    reference: string;
+    notes?: string;
+}
+
+export interface TaskGoldenLoadResult {
+    records: TaskGoldenRecord[];
+    parseErrors: number;
+    sourcePath: string;
+}
+
+/** 골든셋 로드 — jsonl, `//` 주석·빈 줄 무시, 깨진 줄 카운트만. */
+export function loadTaskGoldenSet(brainPath: string, taskFileBase = 'meeting-minutes'): TaskGoldenLoadResult {
+    const sourcePath = path.join(brainPath, TASK_GOLDEN_DIR, `${taskFileBase}.golden.jsonl`);
+    const result: TaskGoldenLoadResult = { records: [], parseErrors: 0, sourcePath };
+    if (!fs.existsSync(sourcePath)) return result;
+    const lines = fs.readFileSync(sourcePath, 'utf8').split('\n');
+    for (const line of lines) {
+        const t = line.trim();
+        if (!t || t.startsWith('//')) continue;
+        try {
+            const obj = JSON.parse(t);
+            if (obj && typeof obj.id === 'string' && typeof obj.query === 'string' && Array.isArray(obj.expectedElements)) {
+                result.records.push(obj as TaskGoldenRecord);
+            } else {
+                result.parseErrors++;
+            }
+        } catch {
+            result.parseErrors++;
+        }
+    }
+    return result;
+}
+
+/** 요소 label → detectPatterns 매핑 (requirementGraph 정의 재사용, 못 찾으면 label 리터럴). */
+function patternsForLabel(label: string, requirements: TaskRequirement[]): RegExp {
+    for (const req of requirements) {
+        for (const el of req.elements) {
+            if (el.label === label) return new RegExp(el.detectPatterns.join('|'), 'iu');
+        }
+    }
+    // 정의에 없는 커스텀 요소 — label 자체를 리터럴 매치 (정규식 특수문자 escape).
+    return new RegExp(label.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'iu');
+}
+
+export interface TaskScore {
+    recordId: string;
+    /** 요소 커버리지 0~1. */
+    coverageRate: number;
+    covered: string[];
+    missing: string[];
+    /** "(확인 필요)" 류 정직성 마커 사용 수 (지어내기 방지 신호). */
+    honestyMarkers: number;
+    /** `##` 섹션 헤딩 수 (구조화 정도). */
+    sectionCount: number;
+    /** 출력 길이 (chars). */
+    answerLength: number;
+    /** 생성 실패 시 에러 메시지. */
+    error?: string;
+}
+
+export function scoreTaskAnswer(
+    answer: string,
+    record: TaskGoldenRecord,
+    requirements: TaskRequirement[] = DEFAULT_TASK_REQUIREMENTS,
+): TaskScore {
+    const covered: string[] = [];
+    const missing: string[] = [];
+    for (const label of record.expectedElements) {
+        if (patternsForLabel(label, requirements).test(answer)) covered.push(label);
+        else missing.push(label);
+    }
+    const honesty = answer.match(/\(확인 필요\)|\(담당자? 미정\)|\(기한 미정\)/g);
+    const sections = answer.match(/^#{1,3}\s+/gm);
+    return {
+        recordId: record.id,
+        coverageRate: record.expectedElements.length === 0 ? 1 : covered.length / record.expectedElements.length,
+        covered,
+        missing,
+        honestyMarkers: honesty ? honesty.length : 0,
+        sectionCount: sections ? sections.length : 0,
+        answerLength: answer.length,
+    };
+}
+
+export interface TaskEvalRunResult {
+    scores: TaskScore[];
+    avgCoverage: number;
+    perfectCount: number;
+}
+
+/**
+ * 골든셋 전체 평가 — 레코드별로 원자료를 읽어(readSource 주입) 생성(generate 주입)
+ * 후 채점. 한 레코드 실패가 전체를 막지 않음.
+ */
+export async function runTaskEval(params: {
+    records: TaskGoldenRecord[];
+    /** 원자료 파일 내용 로드 — 보통 fs.readFileSync, 테스트에선 fake. */
+    readSource: (sourceFile: string) => string;
+    /** 산출물 생성 — (query, sourceContent, expectedElements) → 답변. */
+    generate: (record: TaskGoldenRecord, sourceContent: string) => Promise<string>;
+    /** 원자료 최대 길이 (chars) — 로컬 모델 컨텍스트 보호. 기본 20000. */
+    maxSourceChars?: number;
+    onProgress?: (done: number, total: number) => void;
+}): Promise<TaskEvalRunResult> {
+    const maxChars = params.maxSourceChars ?? 20000;
+    const scores: TaskScore[] = [];
+    let done = 0;
+    for (const record of params.records) {
+        try {
+            let source = params.readSource(record.sourceFile);
+            if (source.length > maxChars) source = source.slice(0, maxChars) + '\n…(잘림)';
+            const answer = await params.generate(record, source);
+            scores.push(scoreTaskAnswer(answer, record));
+        } catch (e: any) {
+            scores.push({
+                recordId: record.id, coverageRate: 0, covered: [], missing: record.expectedElements.slice(),
+                honestyMarkers: 0, sectionCount: 0, answerLength: 0,
+                error: e?.message || String(e),
+            });
+        }
+        done++;
+        params.onProgress?.(done, params.records.length);
+    }
+    const valid = scores.filter((s) => !s.error);
+    const avgCoverage = valid.length === 0 ? 0 : valid.reduce((s, r) => s + r.coverageRate, 0) / valid.length;
+    return { scores, avgCoverage, perfectCount: valid.filter((s) => s.coverageRate === 1).length };
+}
+
+export function formatTaskEvalReport(
+    result: TaskEvalRunResult,
+    meta: { taskLabel: string; brainName: string; dateStr: string; modelName: string; notes?: string },
+): string {
+    const lines: string[] = [];
+    lines.push(`# 업무 평가 리포트 — ${meta.taskLabel}`);
+    lines.push('');
+    lines.push(`- 두뇌: ${meta.brainName}`);
+    lines.push(`- 일시: ${meta.dateStr}`);
+    lines.push(`- 모델: ${meta.modelName}`);
+    if (meta.notes) lines.push(`- 비고: ${meta.notes}`);
+    lines.push('');
+    lines.push(`## 요약 — 평균 요소 커버리지 **${(result.avgCoverage * 100).toFixed(1)}%** · 전 요소 충족 ${result.perfectCount}/${result.scores.length}건`);
+    lines.push('');
+    lines.push('| 레코드 | 커버리지 | 누락 요소 | 정직성 표시 | 섹션 수 | 길이 |');
+    lines.push('|---|---|---|---|---|---|');
+    for (const s of result.scores) {
+        if (s.error) {
+            lines.push(`| ${s.recordId} | — | (실패: ${s.error.slice(0, 60)}) | — | — | — |`);
+        } else {
+            lines.push(`| ${s.recordId} | ${(s.coverageRate * 100).toFixed(0)}% | ${s.missing.join(', ') || '없음'} | ${s.honestyMarkers} | ${s.sectionCount} | ${s.answerLength} |`);
+        }
+    }
+    lines.push('');
+    lines.push('> 같은 골든셋으로 버전마다 측정해 커버리지 추이를 비교하세요 — 이 숫자의 상승이 곧 성장세입니다.');
+    lines.push('');
+    return lines.join('\n');
+}