feat: Stabilize Company Suite & Self-Reflection logic, integrate new ADRs and bug records

2026-05-14 16:05:28 +09:00
parent f521c3f557
commit 618b8d5b34
33 changed files with 2203 additions and 655 deletions
@@ -39,8 +39,9 @@ import {
    mapWeightToBrainFileLimit,
    buildKnowledgeMixPolicy,
 } from '../../retrieval/knowledgeMix';
-import { getCompanyAgent } from './agents';
-import { modelForAgent, readCompanyState, resolveCompanyKnowledgeMix } from './companyConfig';
+import {
+    modelForAgent, readCompanyState, resolveActivePipeline, resolveAgent, resolveCompanyKnowledgeMix,
+} from './companyConfig';
 import { runCeoPlanner } from './ceoPlanner';
 import { runCeoReporter } from './ceoReporter';
 import { buildSpecialistPrompt } from './promptBuilder';
@@ -57,7 +58,7 @@ import {
    writeSessionJson,
 } from './sessionStore';
 import { buildTelegramReporter, formatCompanyTelegramReport } from './telegramReport';
-import { AgentTurnOutput, CompanyState, CompanyTaskPlan, SessionResult } from './types';
+import { AgentTurnOutput, CompanyState, CompanyTaskPlan, PipelineDef, PipelineStage, SessionResult } from './types';

 /** Trim length applied when an agent's output is fed into the next agent. */
 const PEER_OUTPUT_BUDGET = 1500;
@@ -73,6 +74,13 @@ export type CompanyTurnEvent =
    | { phase: 'plan-ready'; plan: CompanyTaskPlan; parsed: boolean; raw: string }
    | { phase: 'agent-start'; agentId: string; task: string; index: number; total: number }
    | { phase: 'agent-done'; agentId: string; output: AgentTurnOutput; index: number; total: number }
+    /**
+     * Pipeline-mode only: emitted when a stage's output matches the
+     * configured `loopBackPattern` and the dispatcher jumps back to a
+     * previous stage. The webview uses this to render "🔁 stage X → Y
+     * (재시도 N차)" in the chat.
+     */
+    | { phase: 'stage-loop'; from: string; to: string; iteration: number }
    | { phase: 'report-start' }
    | { phase: 'report-done'; report: string; ok: boolean }
    /**
@@ -147,38 +155,59 @@ export async function runCompanyTurn(
    };
    if (isAborted()) return fail('signal-aborted');

-    // ── Phase 1: planner ──
+    // ── Phase 1: plan (pipeline or legacy planner) ──
    emit({ phase: 'plan-start' });
-    const ceoModel = modelForAgent(state, 'ceo', deps.defaultModel);
-    const plannerResult = await runCeoPlanner(deps.ai, userPrompt, state, { model: ceoModel });
+    const pipeline = resolveActivePipeline(state);
+    let plan: CompanyTaskPlan;
+    let plannerRaw = '';
+    let plannerParsed = false;
+    if (pipeline) {
+        // Pipeline mode: the user has authored a fixed sequence of stages.
+        // We still surface a `plan` for the report writer and the session
+        // summary — derived directly from the pipeline definition.
+        plan = {
+            brief: `[Pipeline: ${pipeline.name}] ${userPrompt.slice(0, 200)}`,
+            tasks: pipeline.stages.map((s) => ({ agent: s.agentId, task: s.label })),
+        };
+        plannerParsed = true;
+    } else {
+        const ceoModel = modelForAgent(state, 'ceo', deps.defaultModel);
+        const plannerResult = await runCeoPlanner(deps.ai, userPrompt, state, { model: ceoModel });
+        plan = plannerResult.plan;
+        plannerRaw = plannerResult.raw;
+        plannerParsed = plannerResult.parsed;
+    }
    if (isAborted()) return fail('aborted-after-plan');
    emit({
        phase: 'plan-ready',
-        plan: plannerResult.plan,
-        parsed: plannerResult.parsed,
-        raw: plannerResult.raw,
+        plan,
+        parsed: plannerParsed,
+        raw: plannerRaw,
    });
-    writeBrief(sessionDir, userPrompt, plannerResult.plan);
+    writeBrief(sessionDir, userPrompt, plan);

    // ── Phase 2: sequential dispatch ──
    const outputs: AgentTurnOutput[] = [];
-    const total = plannerResult.plan.tasks.length;
-    for (let i = 0; i < total; i++) {
-        if (isAborted()) return fail('aborted-mid-dispatch');
-        const task = plannerResult.plan.tasks[i];
-        emit({ phase: 'agent-start', agentId: task.agent, task: task.task, index: i, total });
-        const turn = await _dispatchOne(task.agent, task.task, outputs, state, deps);
-        outputs.push(turn);
-        writeAgentOutput(sessionDir, turn);
-        // Best-effort: append a one-line memory entry so the agent "remembers"
-        // having done this task. Verbose successes are summarized in the CEO
-        // report — memory is just the breadcrumb trail.
-        appendAgentMemory(
-            deps.context,
-            task.agent,
-            `[${timestamp}] ${task.task} — ${turn.error ? `❌ ${turn.error}` : '✅'}`,
-        );
-        emit({ phase: 'agent-done', agentId: task.agent, output: turn, index: i, total });
+    if (pipeline) {
+        const runResult = await _runPipeline(pipeline, userPrompt, plan.brief, sessionDir, timestamp, state, deps, isAborted, emit);
+        if (runResult.aborted) return fail(runResult.aborted);
+        outputs.push(...runResult.outputs);
+    } else {
+        const total = plan.tasks.length;
+        for (let i = 0; i < total; i++) {
+            if (isAborted()) return fail('aborted-mid-dispatch');
+            const task = plan.tasks[i];
+            emit({ phase: 'agent-start', agentId: task.agent, task: task.task, index: i, total });
+            const turn = await _dispatchOne(task.agent, task.task, outputs, state, deps);
+            outputs.push(turn);
+            writeAgentOutput(sessionDir, turn);
+            appendAgentMemory(
+                deps.context,
+                task.agent,
+                `[${timestamp}] ${task.task} — ${turn.error ? `❌ ${turn.error}` : '✅'}`,
+            );
+            emit({ phase: 'agent-done', agentId: task.agent, output: turn, index: i, total });
+        }
    }

    // ── Phase 3: synthesis ──
@@ -187,7 +216,7 @@ export async function runCompanyTurn(
    const reportModel = modelForAgent(state, 'ceo', deps.defaultModel);
    const reportResult = await runCeoReporter(
        deps.ai,
-        plannerResult.plan,
+        plan,
        outputs,
        state,
        { model: reportModel },
@@ -209,7 +238,7 @@ export async function runCompanyTurn(
            const tgText = formatCompanyTelegramReport({
                state,
                userPrompt,
-                plan: plannerResult.plan,
+                plan,
                outputs,
                report: reportResult.report,
                sessionTimestamp: timestamp,
@@ -229,7 +258,7 @@ export async function runCompanyTurn(
    const result: SessionResult = {
        timestamp, sessionDir,
        userPrompt,
-        plan: plannerResult.plan,
+        plan,
        agentOutputs: outputs,
        report: reportResult.report,
        totalDurationMs: Date.now() - startedAt,
@@ -260,7 +289,7 @@ async function _dispatchOne(
    deps: DispatcherDeps,
 ): Promise<AgentTurnOutput> {
    const startedAt = Date.now();
-    const def = getCompanyAgent(agentId);
+    const def = resolveAgent(state, agentId);
    if (!def) {
        return {
            agentId, task, response: '', durationMs: 0,
@@ -272,7 +301,7 @@ async function _dispatchOne(
    const peerOutputs = earlierOutputs
        .filter((o) => !o.error) // skip failed peers — they'd just confuse the next agent
        .map((o) => {
-            const peerDef = getCompanyAgent(o.agentId);
+            const peerDef = resolveAgent(state, o.agentId);
            const body = o.response.length > PEER_OUTPUT_BUDGET
                ? o.response.slice(0, PEER_OUTPUT_BUDGET) + '\n…(truncated)'
                : o.response;
@@ -399,6 +428,94 @@ async function _dispatchOne(
    }
 }

+/**
+ * Run an authored pipeline: each stage dispatches its agent with a templated
+ * instruction. Stages can declare a `loopBackPattern` regex — when it
+ * matches the stage's output, the dispatcher jumps back to `loopBackTo` (a
+ * stage that must precede the current one). Iteration count is bounded by
+ * `maxIterations` (default 3) to keep run-away loops from hanging the user.
+ *
+ * Returns `{ outputs, aborted }`: `aborted` is set only when the abort
+ * signal flipped mid-run; the outer dispatcher then short-circuits.
+ */
+async function _runPipeline(
+    pipeline: PipelineDef,
+    userPrompt: string,
+    brief: string,
+    sessionDir: string,
+    timestamp: string,
+    state: CompanyState,
+    deps: DispatcherDeps,
+    isAborted: () => boolean,
+    emit: CompanyTurnEmitter,
+): Promise<{ outputs: AgentTurnOutput[]; aborted?: string }> {
+    const outputs: AgentTurnOutput[] = [];
+    // Keep the latest output per stage id so `{{stage.<id>}}` template
+    // tokens always resolve to the most recent value across loop-backs.
+    const latestByStage: Record<string, AgentTurnOutput> = {};
+    const iterations: Record<string, number> = {};
+    const total = pipeline.stages.length;
+    let i = 0;
+    let stepIndex = 0;
+    while (i < pipeline.stages.length) {
+        if (isAborted()) return { outputs, aborted: 'aborted-mid-pipeline' };
+        const stage = pipeline.stages[i];
+        const task = _renderStageInstruction(stage, userPrompt, brief, latestByStage);
+        emit({ phase: 'agent-start', agentId: stage.agentId, task, index: stepIndex, total });
+        const turn = await _dispatchOne(stage.agentId, task, outputs, state, deps);
+        outputs.push(turn);
+        latestByStage[stage.id] = turn;
+        writeAgentOutput(sessionDir, turn);
+        appendAgentMemory(
+            deps.context, stage.agentId,
+            `[${timestamp}][${pipeline.id}/${stage.id}] ${task.slice(0, 120)} — ${turn.error ? `❌ ${turn.error}` : '✅'}`,
+        );
+        emit({ phase: 'agent-done', agentId: stage.agentId, output: turn, index: stepIndex, total });
+        stepIndex++;
+        // Loop-back evaluation. We only loop on *successful* responses with
+        // non-empty body — an error or empty response would loop forever.
+        if (stage.loopBackTo && stage.loopBackPattern && !turn.error && turn.response.trim()) {
+            const limit = stage.maxIterations ?? 3;
+            const count = (iterations[stage.id] ?? 0) + 1;
+            iterations[stage.id] = count;
+            let re: RegExp | null = null;
+            try { re = new RegExp(stage.loopBackPattern, 'i'); } catch { re = null; }
+            if (re && re.test(turn.response) && count <= limit) {
+                const targetIdx = pipeline.stages.findIndex((s) => s.id === stage.loopBackTo);
+                if (targetIdx !== -1 && targetIdx < i) {
+                    emit({ phase: 'stage-loop', from: stage.id, to: stage.loopBackTo, iteration: count });
+                    i = targetIdx;
+                    continue;
+                }
+            }
+        }
+        i++;
+    }
+    return { outputs };
+}
+
+/**
+ * Substitute template tokens in a stage's instruction. Falls back to the
+ * raw user prompt when the template is empty so the user doesn't have to
+ * fill every stage with a long template just to forward the original ask.
+ */
+function _renderStageInstruction(
+    stage: PipelineStage,
+    userPrompt: string,
+    brief: string,
+    latestByStage: Record<string, AgentTurnOutput>,
+): string {
+    const tpl = (stage.instructionTemplate || '').trim();
+    if (!tpl) return userPrompt;
+    return tpl
+        .replace(/\{\{\s*userPrompt\s*\}\}/g, userPrompt)
+        .replace(/\{\{\s*brief\s*\}\}/g, brief)
+        .replace(/\{\{\s*stage\.([a-zA-Z0-9_-]+)\s*\}\}/g, (_m, sid) => {
+            const o = latestByStage[sid];
+            return o?.response ?? `[stage:${sid} 아직 실행되지 않음]`;
+        });
+}
+
 /**
 * Cheap pre-check so we don't fire up the action-tag executor for every
 * specialist response — only the ones that actually contain a recognised