feat: v2.2.3 - Stability, Self-Reflector & Intent Alignment

- 버전 2.2.3 상향 및 PATCHNOTES.md 업데이트 - [신규] src/features/selfReflector/ - 성찰 실행/검증/프롬프트 모듈 추가 - [신규] intentAlignment.ts, intentClassifier.ts - 의도 정렬 시스템 추가 - [신규] pixelOfficeState.ts - 픽셀 오피스 상태 관리 추가 - sidebarProvider, dispatcher, chatHandlers 핵심 로직 최적화 - astra-2.2.3.vsix 패키지 생성 완료 (298 tests PASS)
2026-05-15 14:16:14 +09:00
parent ed7e497194
commit 72412450c3
33 changed files with 4964 additions and 125 deletions
@@ -40,6 +40,7 @@ import {
    buildKnowledgeMixPolicy,
 } from '../../retrieval/knowledgeMix';
 import {
+    listActiveAgentsByCategory,
    modelForAgent, readCompanyState, resolveActivePipeline, resolveAgent, resolveCompanyKnowledgeMix,
 } from './companyConfig';
 import { runCeoPlanner } from './ceoPlanner';
@@ -64,7 +65,11 @@ import {
    writeResumeState,
 } from './resumeStore';
 import { buildTelegramReporter, formatCompanyTelegramReport } from './telegramReport';
-import { AgentTurnOutput, CompanyResumeState, CompanyState, CompanyTaskPlan, PipelineDef, PipelineStage, SessionResult } from './types';
+import {
+    AgentRoleCategory, AgentTurnOutput, CompanyResumeState, CompanyState, CompanyTaskPlan,
+    PipelineDef, PipelineStage, RequirementContract, ROLE_CATEGORY_LABELS, SessionResult,
+} from './types';
+import { formatContractForPrompt } from './intentAlignment';

 /** Trim length applied when an agent's output is fed into the next agent. */
 const PEER_OUTPUT_BUDGET = 1500;
@@ -105,6 +110,28 @@ export type CompanyTurnEvent =
    | { phase: 'awaiting-approval'; stageId: string; stageLabel: string; index: number; total: number }
    /** Resolved approval — purely informational for the chat log. */
    | { phase: 'approval-resolved'; stageId: string; decision: 'approve' | 'revise' | 'abort' }
+    /**
+     * 3-way 검수 사이클 시작 — 작업자 산출물 직후, 검수자/CEO 메타-판단을
+     * 돌리기 직전에 emit. webview는 stage 카드 안에 라운드 누적 영역을 연다.
+     */
+    | { phase: 'review-start'; stageId: string; stageLabel: string; maxRounds: number; inspectorAgentId: string }
+    /**
+     * 한 검수 라운드 결과. inspectorVerdict + ceoVerdict + 각자 코멘트를
+     * 묶어 한 이벤트로. 라운드를 chat에서 한 줄씩 누적 표시 가능하다.
+     */
+    | {
+        phase: 'review-round';
+        stageId: string;
+        round: number;
+        inspectorAgentId: string;
+        inspectorText: string;
+        inspectorVerdict: 'pass' | 'revise' | 'unclear';
+        ceoText: string;
+        ceoVerdict: 'pass' | 'revise' | 'abort' | 'unclear';
+        durationMs: number;
+    }
+    /** 검수 사이클 종료. final = 마지막 라운드 verdict. */
+    | { phase: 'review-end'; stageId: string; final: 'pass' | 'aborted' | 'maxed-out'; rounds: number }
    | { phase: 'report-start' }
    | { phase: 'report-done'; report: string; ok: boolean }
    /**
@@ -160,6 +187,22 @@ export interface DispatcherDeps {
     *      (so the dispatcher doesn't hang forever)
     */
    awaitApproval?: (ctx: { stageId: string; stageLabel: string }) => Promise<ApprovalDecision>;
+    /**
+     * 이번 turn 한정으로 활성 파이프라인을 *override*. 비어 있으면 평소대로
+     * `state.activePipelineId` 따른다. 의도 분류기의 `suggestedPipelineId` 또는
+     * 사용자 키워드(`[파이프라인:id]`) 검출 시 chatHandlers가 채워서 넘긴다.
+     * 알 수 없는 id면 dispatcher가 silent fallback해서 legacy 동작
+     * (state.activePipelineId 또는 CEO planner)로 진행.
+     */
+    pipelineIdOverride?: string;
+    /**
+     * Intent Alignment 단계에서 사용자와 합의된 Requirement Contract. 있으면
+     * CEO planner / specialist prompt / 검수자(inspector + CEO) prompt 전부에
+     * 같은 ground truth로 주입되어 에이전트들이 추측 대신 contract를 따른다.
+     * 없으면 legacy 동작 — alignment 단계를 거치지 않았거나 사용자 모드가
+     * 'off'였던 경우.
+     */
+    requirementContract?: RequirementContract;
 }

 /**
@@ -267,18 +310,34 @@ export async function runCompanyTurn(
        emit({ phase: 'plan-ready', plan, parsed: true, raw: '' });
    } else {
        emit({ phase: 'plan-start' });
-        pipeline = resolveActivePipeline(state);
+        // deps.pipelineIdOverride가 들어왔으면 *이번 turn만* 그 파이프라인을 쓴다.
+        // state.activePipelineId는 건드리지 않으므로 다음 라운드부턴 다시 사용자
+        // 설정 따른다. override id가 유효한 파이프라인을 못 가리키면 silent fallback.
+        const overrideId = deps.pipelineIdOverride;
+        pipeline = overrideId
+            ? (state.pipelines?.[overrideId] ?? resolveActivePipeline(state))
+            : resolveActivePipeline(state);
        if (pipeline) {
            // Pipeline mode: the user has authored a fixed sequence of stages.
            // We still surface a `plan` for the report writer and the session
            // summary — derived directly from the pipeline definition.
            plan = {
                brief: `[Pipeline: ${pipeline.name}] ${userPrompt.slice(0, 200)}`,
-                tasks: pipeline.stages.map((s) => ({ agent: s.agentId, task: s.label })),
+                // stage.agentId가 비어 있는 경우(CEO 동적 선택) 직군 라벨을 placeholder로
+                // 표시 — plan은 사전 요약용이므로 실제 dispatch는 _runPipeline에서 결정.
+                tasks: pipeline.stages.map((s) => ({
+                    agent: s.agentId || (s.roleCategory ? `[직군:${s.roleCategory}]` : '[미정]'),
+                    task: s.label,
+                })),
            };
        } else {
            const ceoModel = modelForAgent(state, 'ceo', deps.defaultModel);
-            const plannerResult = await runCeoPlanner(deps.ai, userPrompt, state, { model: ceoModel });
+            const plannerResult = await runCeoPlanner(deps.ai, userPrompt, state, {
+                model: ceoModel,
+                contractBlock: deps.requirementContract
+                    ? formatContractForPrompt(deps.requirementContract)
+                    : undefined,
+            });
            plan = plannerResult.plan;
            plannerRaw = plannerResult.raw;
            plannerParsed = plannerResult.parsed;
@@ -568,6 +627,11 @@ async function _dispatchOne(
        peerOutputs,
        brainContext,                          // injected as `[SECOND BRAIN CONTEXT]` block
        knowledgeMixPolicy: policyBlock,       // injected as `[KNOWLEDGE MIX POLICY]` block
+        // alignment 단계에서 도출된 contract가 deps에 있으면 모든 specialist의
+        // system 프롬프트에 같은 ground truth로 prepend된다. 추측 방지.
+        contractBlock: deps.requirementContract
+            ? formatContractForPrompt(deps.requirementContract)
+            : undefined,
    });
    // 우선순위: stage > agent > global default.
    const model = (stageModelOverride && stageModelOverride.trim())
@@ -580,7 +644,62 @@ async function _dispatchOne(
            user: task,
            model,
        });
-        const rawResponse = (result.content || '').trim();
+        let rawResponse = (result.content || '').trim();
+
+        // ── Self-Reflector Phase B — 외부 검증 + 1회 retry ──
+        // 사용자가 selfReflector.externalVerification 켰을 때만 동작. 검증 LLM이
+        // 'fail' 내면 issue를 task에 prepend해서 같은 specialist 1회 더 호출.
+        // 검증 자체가 실패하면(verifierError) 원본 응답을 그대로 보존하고 진행 — 안전망.
+        let verifierIssues: string[] = [];
+        let verifierSummary = '';
+        try {
+            // dynamic import — Phase B는 옵션이므로 미사용 시 모듈 자체를 안 로드.
+            const { getConfig } = await import('../../config');
+            const cfgRuntime = getConfig();
+            if (cfgRuntime.selfReflectorExternalEnabled && rawResponse) {
+                const { verifyResponse, formatIssuesForRetry } =
+                    await import('../selfReflector/selfReflectorVerifier');
+                const { formatContractForPrompt } = await import('./intentAlignment');
+                const contractBlock = deps.requirementContract
+                    ? formatContractForPrompt(deps.requirementContract)
+                    : undefined;
+                const verdict = await verifyResponse(deps.ai, {
+                    task,
+                    response: rawResponse,
+                    agentName: def.name,
+                    model,
+                    contractBlock,
+                });
+                verifierIssues = verdict.issues;
+                verifierSummary = verdict.summary;
+                logInfo('selfReflector.B: verdict.', {
+                    agentId, verdict: verdict.verdict, issuesCount: verdict.issues.length,
+                });
+                if (verdict.verdict === 'fail' && verdict.issues.length > 0) {
+                    const retryTask = `${formatIssuesForRetry(verdict.issues)}\n\n[원래 지시]\n${task}`;
+                    try {
+                        const retryRes = await deps.ai.chat({
+                            system, user: retryTask, model,
+                        });
+                        const retried = (retryRes.content || '').trim();
+                        if (retried) {
+                            rawResponse = retried;
+                            verifierSummary = `검증 fail → 1회 retry 적용 (${verdict.issues.length}개 지적 반영)`;
+                        }
+                    } catch (e: any) {
+                        logError('selfReflector.B: retry call failed; keeping original.', {
+                            agentId, error: e?.message ?? String(e),
+                        });
+                    }
+                }
+            }
+        } catch (e: any) {
+            // Phase B 전체가 실패해도 dispatch 자체는 계속.
+            logError('selfReflector.B: hook failed; continuing without verification.', {
+                agentId, error: e?.message ?? String(e),
+            });
+        }
+
        // Apply ConnectAI's action-tag executor so `<create_file>`,
        // `<run_command>`, `<edit_file>`, etc. emitted by the agent actually
        // hit disk / shell. The report (e.g. "✅ Created: foo.py") is
@@ -592,8 +711,93 @@ async function _dispatchOne(
            try {
                const report = await deps.executeActionTags(rawResponse);
                actionReport = report;
-                if (report.length > 0) {
-                    finalResponse = `${rawResponse}\n\n---\n**Action 실행 결과:**\n${report.map((r) => `- ${r}`).join('\n')}`;
+
+                // ── Self-Reflector Phase C — 생성/편집된 파일 syntax 체크 ──
+                // 사용자가 selfReflector.executionVerification 켰을 때만. 추가
+                // report 항목들을 actionReport에 append + finalResponse 첨부 본문에도 반영.
+                try {
+                    const { getConfig } = await import('../../config');
+                    const cfgRuntime = getConfig();
+                    if (cfgRuntime.selfReflectorExecutionEnabled && actionReport.length > 0) {
+                        const { verifyCreatedFiles } = await import('../selfReflector/selfReflectorExecution');
+                        const projectRoot = vscode.workspace.workspaceFolders?.[0]?.uri.fsPath || '';
+                        if (projectRoot) {
+                            const extra = await verifyCreatedFiles(actionReport, projectRoot);
+                            if (extra.length > 0) {
+                                actionReport = [...actionReport, ...extra];
+                            }
+                        }
+                    }
+                } catch (e: any) {
+                    logError('selfReflector.C: hook failed; continuing without execution check.', {
+                        agentId, error: e?.message ?? String(e),
+                    });
+                }
+
+                // ── Self-Reflector Hollow Code Check (휴리스틱, LLM 콜 0) ──
+                // Phase C(syntax)가 잡지 못하는 *빈 깡통* 패턴을 정규식으로 잡는다.
+                // hollow 발견 → 1) actionReport에 ❌ 라인 추가 2) verifierIssues에
+                // 합류시켜 Phase B retry 트리거 (혹은 Phase B OFF면 사용자에게
+                // 경고만 표시). 작은 LLM이 가장 자주 만드는 실패 패턴이라
+                // selfReflectorEnabled가 켜져 있으면 *조건부 자동 활성화*.
+                try {
+                    const { getConfig } = await import('../../config');
+                    const cfgRuntime = getConfig();
+                    if (cfgRuntime.selfReflectorEnabled && actionReport.length > 0) {
+                        const { verifyHollow } = await import('../selfReflector/selfReflectorHollow');
+                        const projectRoot = vscode.workspace.workspaceFolders?.[0]?.uri.fsPath || '';
+                        if (projectRoot) {
+                            const hollowRes = verifyHollow(actionReport, projectRoot);
+                            if (hollowRes.hasHollow) {
+                                actionReport = [...actionReport, ...hollowRes.extraLines];
+                                // verifier가 켜져 있고 아직 retry 안 했다면 hollow를 issue로
+                                // 격상해서 자동 재작업 트리거. 켜져 있지 않으면 사용자에게
+                                // 경고만 노출(이미 actionReport에 들어감).
+                                if (cfgRuntime.selfReflectorExternalEnabled && verifierIssues.length === 0) {
+                                    verifierIssues = hollowRes.hollowReasons.map((r) => `빈 깡통: ${r}`);
+                                    verifierSummary = `Hollow code 감지 — 자동 재시도 트리거`;
+                                    // 같은 specialist 1회 retry: 빈 깡통 지적을 task 앞에 prepend.
+                                    try {
+                                        const { formatIssuesForRetry } = await import('../selfReflector/selfReflectorVerifier');
+                                        const retryTask = `${formatIssuesForRetry(verifierIssues)}\n\n[원래 지시]\n${task}`;
+                                        const retryRes = await deps.ai.chat({ system, user: retryTask, model });
+                                        const retried = (retryRes.content || '').trim();
+                                        if (retried) {
+                                            // 재작업 결과로 본문 갱신 + action-tag 다시 실행.
+                                            rawResponse = retried;
+                                            if (deps.executeActionTags && _hasActionTag(retried)) {
+                                                const retryReport = await deps.executeActionTags(retried);
+                                                actionReport = retryReport;
+                                                // 재작업 결과도 hollow 한 번 더 검사.
+                                                const reCheck = verifyHollow(retryReport, projectRoot);
+                                                if (reCheck.hasHollow) {
+                                                    actionReport = [...actionReport, ...reCheck.extraLines];
+                                                    verifierSummary = `재작업 후에도 hollow 일부 잔존 — 사용자 확인 필요`;
+                                                } else {
+                                                    verifierSummary = `Hollow 감지 → 재작업으로 해결`;
+                                                }
+                                            }
+                                        }
+                                    } catch (e: any) {
+                                        logError('selfReflector.hollow: retry call failed.', {
+                                            agentId, error: e?.message ?? String(e),
+                                        });
+                                    }
+                                } else if (!cfgRuntime.selfReflectorExternalEnabled) {
+                                    // verifier OFF — 사용자에게 경고만.
+                                    verifierSummary = `⚠️ Hollow code 감지 — externalVerification 켜면 자동 재시도`;
+                                }
+                            }
+                        }
+                    }
+                } catch (e: any) {
+                    logError('selfReflector.hollow: check failed; continuing.', {
+                        agentId, error: e?.message ?? String(e),
+                    });
+                }
+
+                if (actionReport.length > 0) {
+                    finalResponse = `${rawResponse}\n\n---\n**Action 실행 결과:**\n${actionReport.map((r) => `- ${r}`).join('\n')}`;
                }
            } catch (e: any) {
                // Surface the failure but keep the agent's text — partial
@@ -619,6 +823,14 @@ async function _dispatchOne(
        // mark it as not-fully-successful so the CEO synthesis can read
        // the warning verbatim.
        const claimedButDidnt = rawResponse && !hasTag && _claimsFileCreation(rawResponse);
+        // 검증 요약을 response 끝에 한 줄로 첨부 — 사용자가 *어떻게 검증됐는지*
+        // 빠르게 보고 신뢰도 가늠. issues가 있으면 같이 노출.
+        if (verifierSummary) {
+            const issuesText = verifierIssues.length > 0
+                ? '\n' + verifierIssues.map((i) => `  - ${i}`).join('\n')
+                : '';
+            finalResponse = `${finalResponse}\n\n---\n**🔬 외부 검증:** ${verifierSummary}${issuesText}`;
+        }
        return {
            agentId, task,
            response: finalResponse,
@@ -663,6 +875,282 @@ interface PipelineSeed {
    startIndex: number;
 }

+/**
+ * Resolve which agent should run a given stage *right now*.
+ *
+ * Priority order:
+ *   1. `stage.agentId` is explicitly set → use that agent verbatim. The
+ *      user pinned this stage to a specific person; honour it.
+ *   2. No agentId but `stage.roleCategory` → pull the active agents in
+ *      that category. If exactly one is active, use them (saves an LLM
+ *      call on the common case). If multiple, ask CEO via a single short
+ *      JSON-shaped LLM call which is best fit for this *specific task*.
+ *   3. Neither — return null so the dispatcher can record an error and
+ *      skip the stage cleanly. (normalize already rejects this case but
+ *      we guard at runtime in case a stale state slipped through.)
+ *
+ * The LLM call is wrapped in try/catch with a `firstCandidate` fallback:
+ * a bad classifier response should never block the pipeline, just degrade
+ * to "first active agent in role". Caller decides whether to surface a
+ * note about who CEO chose; we just return `{ agentId, source, reason? }`.
+ */
+async function _resolveStageAgent(
+    stage: PipelineStage,
+    taskText: string,
+    state: CompanyState,
+    deps: DispatcherDeps,
+): Promise<{ agentId: string; source: 'pinned' | 'sole-candidate' | 'ceo-selected' | 'fallback-first'; reason?: string } | null> {
+    if (stage.agentId && resolveAgent(state, stage.agentId)) {
+        return { agentId: stage.agentId, source: 'pinned' };
+    }
+    const cat = stage.roleCategory as AgentRoleCategory | undefined;
+    if (!cat) return null;
+    const candidates = listActiveAgentsByCategory(state)[cat] ?? [];
+    if (candidates.length === 0) return null;
+    if (candidates.length === 1) {
+        return { agentId: candidates[0].id, source: 'sole-candidate' };
+    }
+    // 다수 후보 → CEO에게 1회 LLM 콜로 결정. 시스템 프롬프트는 짧게, JSON만.
+    const catLabel = ROLE_CATEGORY_LABELS[cat] ?? cat;
+    const optionsBlock = candidates.map((c) =>
+        `- id: ${c.id} | 이름: ${c.name} ${c.emoji}`).join('\n');
+    const system = `당신은 1인 기업의 CEO입니다. 다음 task에 가장 적합한 *${catLabel}* 직군 구성원 한 명을 골라주세요.\n\n반드시 아래 JSON 한 줄만 출력. 다른 텍스트(설명, 펜스, 머리말) 일체 금지.\n{"agentId":"<선택한 id>","reason":"한 줄(40자 이내)"}`;
+    const user = `[현재 stage] ${stage.label || stage.id}\n[task]\n${taskText.slice(0, 600)}\n\n[후보]\n${optionsBlock}\n\n위 후보 중 task에 가장 적합한 한 명을 id로 골라 JSON 응답:`;
+    try {
+        const result = await deps.ai.chat({
+            system, user,
+            model: modelForAgent(state, 'ceo', deps.defaultModel),
+        });
+        const raw = (result.content || '').trim();
+        // 가벼운 파서 — 코드펜스 / 잡문 제거 후 첫 {…} 추출.
+        const fenced = raw.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
+        const stage1 = (fenced ? fenced[1] : raw).trim();
+        let picked: { agentId?: unknown; reason?: unknown } | null = null;
+        try { picked = JSON.parse(stage1); } catch {
+            const m = stage1.match(/\{[\s\S]*\}/);
+            if (m) { try { picked = JSON.parse(m[0]); } catch { /* fall through */ } }
+        }
+        const aid = typeof picked?.agentId === 'string' ? picked.agentId.trim() : '';
+        if (aid && candidates.some((c) => c.id === aid)) {
+            const reason = typeof picked?.reason === 'string' ? picked.reason.trim() : '';
+            return { agentId: aid, source: 'ceo-selected', reason };
+        }
+        // 응답이 유효한 후보가 아님 → 첫 번째로 폴백.
+        logInfo('dispatcher: CEO selection invalid; falling back to first candidate.', {
+            stageId: stage.id, rawHead: raw.slice(0, 80),
+        });
+    } catch (e: any) {
+        logError('dispatcher: CEO selection call failed; falling back.', {
+            stageId: stage.id, error: e?.message ?? String(e),
+        });
+    }
+    return { agentId: candidates[0].id, source: 'fallback-first' };
+}
+
+/**
+ * 검수자(또는 직군)를 stage.reviewWith 값에 따라 한 명 결정.
+ *   - 'inspector' / 'role:<cat>' → 해당 직군 활성 후보 중 첫 번째
+ *   - 'agent:<id>'              → 그 에이전트 (활성/비활성 무관)
+ * 후보가 없으면 null — 호출자가 검수 사이클을 skip.
+ */
+function _resolveInspector(
+    reviewWith: string,
+    state: CompanyState,
+): { agentId: string } | null {
+    if (reviewWith === 'inspector') {
+        const list = listActiveAgentsByCategory(state)['inspector'] ?? [];
+        return list[0] ? { agentId: list[0].id } : null;
+    }
+    if (reviewWith.startsWith('role:')) {
+        const cat = reviewWith.slice(5) as AgentRoleCategory;
+        const list = listActiveAgentsByCategory(state)[cat] ?? [];
+        return list[0] ? { agentId: list[0].id } : null;
+    }
+    if (reviewWith.startsWith('agent:')) {
+        const id = reviewWith.slice(6);
+        return resolveAgent(state, id) ? { agentId: id } : null;
+    }
+    return null;
+}
+
+/**
+ * 검수자 응답의 첫 줄에서 verdict를 끌어낸다. 작은 모델이 라벨 흐트러뜨릴 수
+ * 있어 키워드 매칭으로 관대하게. 못 잡으면 'unclear' — 호출자가 안전한 쪽
+ * (보통 'revise')으로 폴백.
+ */
+function _parseInspectorVerdict(text: string): 'pass' | 'revise' | 'unclear' {
+    const head = (text || '').split(/\n/, 1)[0] ?? '';
+    if (/^\s*(?:✅|통과|승인|pass|approve|ok)/i.test(head)) return 'pass';
+    if (/^\s*(?:❌|보완|재작업|revise|reject|fail|보완 필요)/i.test(head)) return 'revise';
+    // 본문에 명확한 신호가 있으면 잡아냄 — 작은 모델이 머리말을 빠뜨리는 경우.
+    if (/✅\s*통과|모든 케이스 통과/.test(text)) return 'pass';
+    if (/❌|보완 필요|재작업/.test(text)) return 'revise';
+    return 'unclear';
+}
+
+function _parseCeoVerdict(text: string): 'pass' | 'revise' | 'abort' | 'unclear' {
+    const head = (text || '').split(/\n/, 1)[0] ?? '';
+    if (/^\s*(?:✅|통과|approve|pass|최종\s*ok|진행)/i.test(head)) return 'pass';
+    if (/^\s*(?:🔁|보완|한 번 더|revise|다시)/i.test(head)) return 'revise';
+    if (/^\s*(?:🛑|중단|stop|abort|그만)/i.test(head)) return 'abort';
+    if (/✅\s*통과/.test(text)) return 'pass';
+    if (/🛑|중단/.test(text)) return 'abort';
+    if (/🔁|보완|한 번 더/.test(text)) return 'revise';
+    return 'unclear';
+}
+
+/**
+ * 3-way 합의 검수 사이클. 작업자 산출물(latestOutput)을 받고:
+ *   1. 검수자에게 보내 ✅/❌ 코멘트를 받음
+ *   2. CEO에게 (산출물 + 검수자 코멘트)를 보내 ✅/🔁/🛑 메타-판단을 받음
+ *   3. 검수자 ✅ + CEO ✅ → pass / 아니면 다음 라운드 / CEO 🛑 → 즉시 abort
+ *   4. 최대 라운드 도달 시 maxed-out (강제 통과로 처리하되 webview에 경고)
+ *
+ * Revise verdict 시 작업자에게 *어떤 부분을 고쳐야 하는지* 검수자 코멘트가
+ * 그대로 전달돼야 하므로 revisionNotes 맵에 검수 코멘트를 채워 caller가
+ * 사용자 코멘트와 동일한 메커니즘으로 stage 재실행하게 한다.
+ */
+async function _runReviewCycle(args: {
+    stage: PipelineStage;
+    stageTaskText: string;
+    latestOutput: AgentTurnOutput;
+    state: CompanyState;
+    deps: DispatcherDeps;
+    emit: CompanyTurnEmitter;
+    isAborted: () => boolean;
+}): Promise<{
+    verdict: 'pass' | 'revise' | 'abort' | 'maxed-out' | 'aborted';
+    revisionNote?: string;
+    rounds: number;
+}> {
+    const { stage, stageTaskText, latestOutput, state, deps, emit, isAborted } = args;
+    const reviewWith = stage.reviewWith || '';
+    if (!reviewWith) return { verdict: 'pass', rounds: 0 };
+    const inspector = _resolveInspector(reviewWith, state);
+    if (!inspector) {
+        // 검수자 못 찾으면 사이클 생략하고 통과로 처리 — 사용자에게 보이지
+        // 않게 silent; 카드 에디터의 검수 dropdown에서 사용자가 직접 인지할
+        // 수 있다.
+        logInfo('reviewCycle: no inspector resolvable; skipping.', { stageId: stage.id, reviewWith });
+        return { verdict: 'pass', rounds: 0 };
+    }
+    const maxRounds = Math.max(1, Math.min(10, stage.reviewMaxRounds ?? 3));
+    emit({
+        phase: 'review-start',
+        stageId: stage.id,
+        stageLabel: stage.label || stage.id,
+        maxRounds,
+        inspectorAgentId: inspector.agentId,
+    });
+    let currentOutput = latestOutput;
+    let lastInspectorText = '';
+    let lastInspectorVerdict: 'pass' | 'revise' | 'unclear' = 'unclear';
+    let lastCeoText = '';
+    let lastCeoVerdict: 'pass' | 'revise' | 'abort' | 'unclear' = 'unclear';
+    for (let round = 1; round <= maxRounds; round++) {
+        if (isAborted()) {
+            emit({ phase: 'review-end', stageId: stage.id, final: 'aborted', rounds: round - 1 });
+            return { verdict: 'aborted', rounds: round - 1 };
+        }
+        const startedAt = Date.now();
+        // contract가 있으면 검수자/CEO 모두에게 같은 ground truth를 prepend —
+        // 검수 기준이 contract와 일치하는지를 정확히 평가할 수 있다.
+        const contractPrefix = deps.requirementContract
+            ? formatContractForPrompt(deps.requirementContract) + '\n\n'
+            : '';
+
+        // ── 1) 검수자 LLM 콜 ──
+        const inspectorSystem = contractPrefix + '당신은 산출물 *감리*입니다. 작업자의 결과물을 객관적으로 검토하고 한국어 마크다운으로 응답하세요.\n\n반드시 첫 줄을 다음 둘 중 하나로 시작:\n  - ✅ 통과 — 산출물이 task 요구 + 위 contract의 criteria를 모두 충족하면.\n  - ❌ 보완 필요: <구체 항목 한 줄> — contract 기준 누락·오류·약점이 있으면.\n\n그 다음 줄들에 *구체적인* 피드백 또는 칭찬 1~3줄. 모호한 일반론 금지.';
+        const inspectorUser = `[현재 stage] ${stage.label || stage.id}\n[task]\n${stageTaskText.slice(0, 1500)}\n\n[작업자 산출물]\n${(currentOutput.response || '').slice(0, 3000)}`;
+        let inspectorText = '';
+        try {
+            const res = await deps.ai.chat({
+                system: inspectorSystem,
+                user: inspectorUser,
+                model: modelForAgent(state, inspector.agentId, deps.defaultModel),
+            });
+            inspectorText = (res.content || '').trim();
+        } catch (e: any) {
+            logError('reviewCycle: inspector call failed.', { stageId: stage.id, round, err: e?.message ?? String(e) });
+            inspectorText = `❌ 보완 필요: 검수자 호출 실패 (${e?.message ?? '알 수 없음'}) — 안전을 위해 한 번 더 시도`;
+        }
+        lastInspectorText = inspectorText;
+        lastInspectorVerdict = _parseInspectorVerdict(inspectorText);
+
+        if (isAborted()) {
+            emit({ phase: 'review-end', stageId: stage.id, final: 'aborted', rounds: round });
+            return { verdict: 'aborted', rounds: round };
+        }
+
+        // ── 2) CEO 메타-판단 ──
+        const ceoSystem = contractPrefix + '당신은 회사 CEO입니다. 작업자 산출물 + 검수자 의견을 보고 *세 명이 모두 만족하는지* 메타-판단을 내립니다. 위 contract 기준에 부합하는지가 핵심.\n\n반드시 첫 줄을 다음 셋 중 하나로 시작:\n  - ✅ 통과 — 산출물·검수가 contract criteria를 모두 충족.\n  - 🔁 보완 — contract 기준 한 가지 이상 미흡. 작업자에게 줄 구체 지시 1~3줄.\n  - 🛑 중단 — 라운드 더 돌아도 의미 없음. 사장님께 현 상태로 보고.';
+        const ceoUser = `[stage] ${stage.label || stage.id}\n[task]\n${stageTaskText.slice(0, 1000)}\n\n[작업자 산출물]\n${(currentOutput.response || '').slice(0, 2000)}\n\n[검수자 의견]\n${inspectorText.slice(0, 1500)}\n\n[지금 라운드: ${round}/${maxRounds}]`;
+        let ceoText = '';
+        try {
+            const res = await deps.ai.chat({
+                system: ceoSystem,
+                user: ceoUser,
+                model: modelForAgent(state, 'ceo', deps.defaultModel),
+            });
+            ceoText = (res.content || '').trim();
+        } catch (e: any) {
+            logError('reviewCycle: CEO meta call failed.', { stageId: stage.id, round, err: e?.message ?? String(e) });
+            ceoText = lastInspectorVerdict === 'pass' ? '✅ 통과' : '🔁 보완';
+        }
+        lastCeoText = ceoText;
+        lastCeoVerdict = _parseCeoVerdict(ceoText);
+
+        emit({
+            phase: 'review-round',
+            stageId: stage.id,
+            round,
+            inspectorAgentId: inspector.agentId,
+            inspectorText,
+            inspectorVerdict: lastInspectorVerdict,
+            ceoText,
+            ceoVerdict: lastCeoVerdict,
+            durationMs: Date.now() - startedAt,
+        });
+
+        // ── 3) 합의 판정 ──
+        // 검수자 ✅ + CEO ✅ → 통과. CEO 🛑 → 즉시 중단. 그 외 → 다음 라운드.
+        // unclear는 안전한 쪽(revise)으로 폴백.
+        if (lastInspectorVerdict === 'pass' && lastCeoVerdict === 'pass') {
+            emit({ phase: 'review-end', stageId: stage.id, final: 'pass', rounds: round });
+            return { verdict: 'pass', rounds: round };
+        }
+        if (lastCeoVerdict === 'abort') {
+            emit({ phase: 'review-end', stageId: stage.id, final: 'aborted', rounds: round });
+            return { verdict: 'abort', rounds: round };
+        }
+        // revise — 다음 라운드 진입 전 작업자에게 줄 코멘트 합성.
+        const note = [
+            `[검수자 ${inspector.agentId}] ${inspectorText.slice(0, 600)}`,
+            `[CEO 메타] ${ceoText.slice(0, 400)}`,
+        ].join('\n\n');
+        // 마지막 라운드 직전이라면 더 이상 작업자를 부를 일 없음 — 그냥 maxed-out.
+        if (round >= maxRounds) {
+            emit({ phase: 'review-end', stageId: stage.id, final: 'maxed-out', rounds: round });
+            return { verdict: 'maxed-out', revisionNote: note, rounds: round };
+        }
+        // 작업자 재실행: caller가 stage를 다시 dispatch하도록 revisionNote 전달.
+        // 그런데 사이클은 한 단위(검수+CEO)를 caller 밖에서 끝나야 하므로 여기서
+        // 직접 작업자 재실행 → 새 currentOutput 갱신.
+        const reDispatchTask = `[검수 피드백 — ${round}라운드]\n${note}\n\n위 피드백을 반드시 반영하세요.\n\n[원래 지시]\n${stageTaskText}`;
+        emit({ phase: 'agent-start', agentId: currentOutput.agentId, task: reDispatchTask, index: -1, total: maxRounds });
+        const reTurn = await _dispatchOne(currentOutput.agentId, reDispatchTask, [], state, deps, stage.modelOverride);
+        emit({ phase: 'agent-done', agentId: currentOutput.agentId, output: reTurn, index: -1, total: maxRounds });
+        currentOutput = reTurn;
+    }
+    // 정상 흐름에선 위 break 조건 중 하나로 빠지지만 안전망으로:
+    emit({ phase: 'review-end', stageId: stage.id, final: 'maxed-out', rounds: maxRounds });
+    return {
+        verdict: 'maxed-out',
+        revisionNote: `[검수자 ${inspector.agentId}] ${lastInspectorText.slice(0, 600)}\n\n[CEO 메타] ${lastCeoText.slice(0, 400)}`,
+        rounds: maxRounds,
+    };
+}
+
 /** _runPipeline이 매 stage 직후 호출하는 commit 콜백의 payload. */
 export interface PipelineCommit {
    outputs: AgentTurnOutput[];
@@ -740,20 +1228,77 @@ async function _runPipeline(
        const task = note
            ? `[사용자 수정 요청]\n${note}\n\n위 피드백을 반드시 반영하세요.\n\n[원래 지시]\n${baseTask}`
            : baseTask;
-        emit({ phase: 'agent-start', agentId: stage.agentId, task, index: stepIndex, total });
-        const turn = await _dispatchOne(stage.agentId, task, outputs, state, deps, stage.modelOverride);
+        // 동적 담당자 해결. stage.agentId가 박혀 있으면 그걸 쓰고, 비어 있으면
+        // CEO가 직군 후보 중에서 1회 LLM 콜로 적임자 선택. 모든 후보가 비활성/없음
+        // 이면 null — 그 경우 stage를 에러로 마킹하고 건너뛴다(파이프라인 hang 방지).
+        const picked = await _resolveStageAgent(stage, task, state, deps);
+        if (!picked) {
+            const errOutput: AgentTurnOutput = {
+                agentId: stage.agentId || `<${stage.roleCategory ?? 'unknown'}>`,
+                task,
+                response: `⚠️ 이 단계에 배정할 활성 에이전트가 없습니다 (직군: ${stage.roleCategory ?? '미지정'}). 관리 패널에서 해당 직군의 에이전트를 활성화하거나, stage에 직접 담당자를 지정하세요.`,
+                durationMs: 0,
+                error: 'no-active-agent-in-role',
+            };
+            outputs.push(errOutput);
+            latestByStage[stage.id] = errOutput;
+            writeAgentOutput(sessionDir, errOutput);
+            emit({ phase: 'agent-done', agentId: errOutput.agentId, output: errOutput, index: stepIndex, total });
+            stepIndex++;
+            i++;
+            continue;
+        }
+        const resolvedAgentId = picked.agentId;
+        // CEO 선택 시 사용자에게 *왜 이 사람*인지 한 줄로 보여주기 위해 task 앞에
+        // 짧은 메타 한 줄을 prepend — 에이전트 시스템 프롬프트엔 영향 없고 chat
+        // 카드 표시에만 쓰인다.
+        let taskForChat = task;
+        if (picked.source === 'ceo-selected' && picked.reason) {
+            taskForChat = `[🧭 CEO 선임: ${picked.reason}]\n\n${task}`;
+        }
+        emit({ phase: 'agent-start', agentId: resolvedAgentId, task: taskForChat, index: stepIndex, total });
+        const turn = await _dispatchOne(resolvedAgentId, task, outputs, state, deps, stage.modelOverride);
        outputs.push(turn);
        latestByStage[stage.id] = turn;
        writeAgentOutput(sessionDir, turn);
        appendAgentMemory(
-            deps.context, stage.agentId,
+            deps.context, resolvedAgentId,
            `[${timestamp}][${pipeline.id}/${stage.id}] ${task.slice(0, 120)} — ${turn.error ? `❌ ${turn.error}` : '✅'}`,
        );
-        emit({ phase: 'agent-done', agentId: stage.agentId, output: turn, index: stepIndex, total });
+        emit({ phase: 'agent-done', agentId: resolvedAgentId, output: turn, index: stepIndex, total });
        stepIndex++;
        // Successful run consumed the revision note (if any) — clear it.
        if (!turn.error) delete revisionNotes[stage.id];

+        // ── 3-way 검수 사이클 ──
+        // 작업자가 에러 없이 응답을 냈고, stage에 reviewWith가 설정돼 있으면
+        // 검수자 + CEO 메타-판단 사이클로 합의를 도출. 합의 실패 시:
+        //   - revise/maxed-out: 검수 코멘트를 revisionNote로 받아 stage 재실행
+        //     (loop-back과 동일한 메커니즘 재활용)
+        //   - abort: 사용자에게 알리고 라운드 종료
+        if (stage.reviewWith && !turn.error) {
+            const reviewResult = await _runReviewCycle({
+                stage,
+                stageTaskText: task,
+                latestOutput: turn,
+                state, deps, emit, isAborted,
+            });
+            if (reviewResult.verdict === 'aborted') {
+                return abortReturn('aborted-during-review');
+            }
+            if (reviewResult.verdict === 'abort') {
+                return abortReturn('aborted-by-ceo-review');
+            }
+            // revise / maxed-out — 모두 작업자에게 다시 보내 한 번 더 (loop-back).
+            // 단, maxed-out은 사용자에게 "한계 도달, 마지막 결과로 진행"을 알려야
+            // 더 자연스러우므로 다음 stage로 그대로 진행 (revisionNote 무시).
+            if (reviewResult.verdict === 'revise' && reviewResult.revisionNote) {
+                revisionNotes[stage.id] = reviewResult.revisionNote;
+                continue; // 같은 stage 재실행 — while(i)는 그대로
+            }
+            // pass / maxed-out → 다음 단계로 진행 (revisionNotes 클리어는 위에서 이미)
+        }
+
        // ── Manual approval gate ──
        // After agent-done emits, before loop-back / next stage advance,
        // give the user a chance to inspect and approve. We only fire the