connectai/src/agent.ts

import * as vscode from 'vscode';
import * as path from 'path';
import * as fs from 'fs';
// axios removed
import {
    findBrainFiles,
    getSystemPrompt,
    shouldAutoPushBrain,
    buildApiUrl,
    getActiveBrainProfile,
    logError,
    logInfo,
    resolveEngine,
    summarizeText
} from './utils';
import { BrainProfile, getConfig, EXCLUDED_DIRS } from './config';
import { validatePath, sanitizeCommand } from './security';
import { TransactionManager } from './core/transaction';
import { SessionManager } from './core/session';
import { AgentWorkflowManager } from './agents/AgentWorkflowManager';
import { buildAstraModeArchitectureContext } from './lib/contextBuilders/astraModeArchitecture';
import { isScheduleRequest, buildScheduleContext } from './lib/contextBuilders/scheduleContext';
import { isSelfAssessRequest, isAboutSelf, buildSelfAssessContext } from './lib/contextBuilders/selfAssessContext';
import { ensureFeatureInventory } from './extension/featureInventory';
import { buildUrlContext } from './lib/contextBuilders/urlContext';
import { extractUrls } from './features/web/webFetch';
import { looksLikeCorrection, captureCorrection } from './intelligence/correctionLoop';
import { shouldUseMultiAgentWorkflow } from './lib/contextBuilders/multiAgentRouting';
import { buildThinkingPartnerResponseContract } from './lib/contextBuilders/thinkingPartnerContract';
import { buildDroppedHistorySummary } from './lib/contextBuilders/droppedHistorySummary';
import { buildRequestHistory, capChatHistory } from './lib/contextBuilders/historyTransform';
import { buildLastTopicLine } from './lib/contextBuilders/lastTopicLine';
import { buildModelCandidates } from './lib/contextBuilders/modelCandidates';
import {
    isThinkingPartnerRequest,
    isCasualConversationPrompt,
    isExplicitSecondBrainRequest,
    isSecondBrainInventoryRequest,
    isNoBrainDataRefusal,
    isAnalysisRequest,
} from './lib/contextBuilders/promptDetection';
import { stripAstraFormattingForAgentMode, computeModeSignature } from './lib/contextBuilders/systemPromptShaping';
import { sanitizeAssistantContent, isRestartedAnswer, parseRationale } from './lib/contextBuilders/outputSanitization';
import { buildEngineMessageVariants } from './lib/contextBuilders/engineMessages';
import { buildMemoryContext as buildMemoryContextFn } from './lib/contextBuilders/memoryContext';
import { extractEvidenceFilesFromProjectKnowledge, extractPriorityPreviewFiles } from './lib/contextBuilders/projectEvidence';
import { buildJarvisProjectBriefContext } from './lib/contextBuilders/jarvisProjectBrief';
import { buildSecondBrainInventoryContext, buildSecondBrainInventoryFallbackAnswer } from './lib/contextBuilders/secondBrainInventory';
import {
    LocalProjectIntent,
    POSIX_ABS_PATH_SRC,
    WIN_ABS_PATH_SRC,
    containsLocalFilePath,
    shouldPreflightLocalProjectPath,
    classifyLocalProjectIntent,
    isProjectKnowledgeCreationRequest,
    isProjectReviewEvaluationRequest,
    buildLocalProjectIntentGuidance,
    buildAstraStanceContext,
} from './lib/contextBuilders/localProjectIntent';
import {
    getProjectDisplayName,
    buildProjectKnowledgeMarkdown,
    buildProjectKnowledgeFallbackAnswer,
    writeProjectKnowledgeRecord,
} from './lib/contextBuilders/projectKnowledge';
import {
    extractLocalProjectPaths,
    listProjectTree,
    findPriorityProjectFiles,
    inspectLocalProjectPath,
    buildLocalProjectPathContext,
    enforceLocalPathReviewAnswer,
} from './lib/contextBuilders/localProjectPath';
import {
    isProjectKnowledgeFollowupRequest,
    buildRecentProjectKnowledgeContext,
    findRecentProjectKnowledgeRecord,
    extractRecentProjectKnowledgeRecordPath,
    ensureRecentProjectKnowledgeEvidence,
    ensureLocalProjectPathEvidence,
    isBlockingProjectKnowledgeAnswer,
} from './lib/contextBuilders/recentProjectKnowledge';
import { ErrorTranslator } from './core/errorHandler';
import { agentEvents, AgentEventTypes } from './core/events';
import {
    AgentExecutionError,
    FileSystemError,
    APICommunicationError
} from './core/errors';
import { StatusBarManager, AgentStatus } from './core/statusBar';
import { lockManager } from './core/lock';
import { actionQueue } from './core/queue';
import { ConflictResolver } from './core/conflict';
import { recordTelemetry } from './core/telemetry';
import {
    buildSecondBrainTrace,
    enforceProjectClaimPolicyInAnswer,
    renderSecondBrainTraceContext,
    renderSecondBrainTraceMarkdown,
    SecondBrainTrace
} from './features/secondBrainTrace';
import { MemoryManager } from './memory';
import { RetrievalOrchestrator } from './retrieval';
import { isQaRegressionFeedback, findUnaddressedChecklistItems } from './retrieval/lessonHelpers';
import { buildKnowledgeMixPolicy, ResolvedKnowledgeMix } from './retrieval/knowledgeMix';
import {
    extractVisibleFinal,
    stripMarkdownFormatting,
    shouldFinalOnlyRetry,
    shouldAutoContinue,
    looksCutOff,
    mergeContinuationParts,
    buildContinuationUserPrompt,
    FINAL_ONLY_DIRECTIVE,
    CONTINUATION_SYSTEM_PROMPT,
} from './core/responseRecovery';
import {
    estimateTokens,
    estimateMessagesTokens,
    computeOutputBudget,
    trimHistoryToBudget,
    truncateSystemPromptContext,
    classifyStopReason,
    truncationNotice,
    shouldShowTruncationNotice,
    estimateModelParamsB,
    estimateActiveParamsB,
    type ContextLimits,
} from './lib/contextManager';
import { samplingToRestBody, type ChatStreamStats } from './lmstudio/streamer';
import { lmStudioSamplingFromConfig, lmStudioRespondExtrasFromConfig } from './lib/contextBuilders/lmStudioSampling';
// Action-tag attribute 파서 3개 → `src/agent/attrParsers.ts`.
// tests/{taskStore,sheetsApi,calendarApi}.test.ts 가 `from '../src/agent'` 로
// import 하므로 import + re-export 한 번에 — local 바인딩이 executeActions 내부
// 사용처에 그대로 보이고, 외부에는 기존 경로 (`from 'agent'`) 그대로 노출.
import { _parseTaskAttrs, _parseSheetAttrs, _parseCalEventAttrs } from './agent/attrParsers';
export { _parseTaskAttrs, _parseSheetAttrs, _parseCalEventAttrs };

// 8 method bodies extracted to dedicated modules. AgentExecutor 의 동명 메서드는
// 이제 thin wrapper — deps 객체를 묶어서 free function 으로 위임.
import { callNonStreaming as callNonStreamingFn } from './agent/llm/callNonStreaming';
import { runMapReduce, shouldMapReduce } from './agent/handlePrompt/largeInputMapReduce';
import { createStreamingRequest as createStreamingRequestFn } from './agent/llm/createStreamingRequest';
import { streamChatOnce as streamChatOnceFn } from './agent/llm/streamChatOnce';
import { maybeEmitDevilRebuttal as maybeEmitDevilRebuttalFn } from './agent/llm/devilRebuttal';
import { compressSessionSummary as compressSessionSummaryFn } from './agent/sessions/compressSummary';
import { callRoleAgent as callRoleAgentFn } from './agent/multiAgent/callRoleAgent';
import { executeMultiAgentWorkflow as executeMultiAgentWorkflowFn } from './agent/multiAgent/workflow';
import {
    restoreLastSession as restoreLastSessionFn,
    executeActionTagsOnText as executeActionTagsOnTextFn,
    syncBrain as syncBrainFn,
} from './agent/misc';
// 8 action handler groups — executeActions 본문에서 분리. 각자 자기 regex 로
// `ctx.aiMessage` 에서 자기 tag 만 골라 처리. 공유 상태는 `ctx` 객체로 흐름.
import type { HandlerContext } from './agent/actions/types';
import { applyFileCreateEditActions } from './agent/actions/fileCreateEdit';
import { applyFileDeleteReadActions } from './agent/actions/fileDeleteRead';
import { applyRunCommandActions } from './agent/actions/runCommand';
import { applyListFilesActions } from './agent/actions/listFiles';
import { applyWebFetchActions } from './agent/actions/webFetch';
import { applyBrainOpsActions } from './agent/actions/brainOps';
import { applyCalendarActions } from './agent/actions/calendar';
import { applySheetsActions } from './agent/actions/sheets';
import { applyTasksActions } from './agent/actions/tasks';

// handlePrompt phases — agent.ts 의 1100줄짜리 monolith 를 7개 phase 모듈로 분리.
// 각 모듈은 pure (혹은 deps callback 패턴) 이라 단위 테스트 가능.
import { buildModeBridgeContext } from './agent/handlePrompt/buildModeBridgeContext';
import { buildPriorTurnConclusionContext } from './lib/contextBuilders/priorTurnConclusion';
import { buildTurnContextBlocks } from './agent/handlePrompt/buildTurnContextBlocks';
import { buildAgentModeSystemPrompt } from './agent/handlePrompt/buildAgentModeSystemPrompt';
import { buildAstraModeSystemPrompt } from './agent/handlePrompt/buildAstraModeSystemPrompt';
import { computeBudgetedRequest } from './agent/handlePrompt/computeBudgetedRequest';
import { processFinalAnswer } from './agent/handlePrompt/processFinalAnswer';
import { runPostAnswerHooks } from './agent/postAnswerHooks';
import { applyAutoContinuation } from './agent/handlePrompt/applyAutoContinuation';

export interface ChatMessage {
    role: 'user' | 'assistant' | 'system';
    content: string;
    internal?: boolean;
    rationale?: {
        problem: string;
        goal: string;
        reasoning: string;
    };
}

type HistoryChangeListener = (history: ChatMessage[]) => void | Promise<void>;

export interface AgentExecutorOptions {
    /** Hooks fired around any LLM streaming run so external systems (LM Studio idle eject) can pause/resume. */
    onStreamLifecycle?: {
        start: () => void;
        end: () => void;
    };
    /**
     * Optional native LM Studio chat streamer. When provided AND the active engine is LM Studio,
     * chat completions are streamed via @lmstudio/sdk's WebSocket transport instead of the
     * OpenAI-compatible REST endpoint. Falls back to REST when omitted or when the streamer
     * itself fails (e.g. SDK reachability error).
     */
    lmStudioStreamer?: import('./lmstudio/streamer').IChatStreamer;
    /**
     * Optional pending-approval queue. When provided, dry-run transactions are also published
     * into a queue that drives the Approval Panel webview + status bar badge. The existing
     * inline `requiresApproval` chat message is preserved for backwards compatibility.
     */
    approvalQueue?: import('./features/approval/approvalQueue').ApprovalQueue;
}

// --- Agent Roles & Workflows ---
export type AgentRole = 'planner' | 'researcher' | 'writer';
// LocalProjectIntent type 은 `src/lib/contextBuilders/localProjectIntent.ts` 로 이관 — import 로 사용.

export const AGENT_PROMPTS: Record<AgentRole, string> = {
    planner: `You are the [Planner Agent]. Your goal is to analyze the user's request and create a detailed execution plan.
1. Breakdown the request into logical steps.
2. Identify key search keywords for the knowledge base.
3. Output your plan in a structured format using <plan> tags.`,
    researcher: `You are the [Researcher Agent]. Your goal is to gather and analyze data based on the Planner's strategy.
1. Search the local knowledge base using the provided keywords.
2. Evaluate data reliability and extract relevant facts.
3. Output your findings using <research_results> tags.`,
    writer: `You are the [Writer Agent]. Your goal is to synthesize all gathered information into a high-quality final report.
1. Use the data from the Researcher.
2. Follow the project's visual and tone-of-voice guidelines.
3. Deliver a logical, consistent, and polished response.`
};

// compactRecentSessions 는 `src/lib/contextBuilders/memoryContext.ts` 안으로 이관 (그 안에서만 사용).

// POSIX / Windows absolute-path regex 는 `src/lib/contextBuilders/localProjectIntent.ts` 의
// ABS_PATH_RE / WIN_ABS_PATH_RE 로 이관. 외부에서 직접 import 해 사용.

export class AgentExecutor {

    /**
     * Hard cap on retained in-memory chat messages. Older messages beyond this
     * are dropped (the system/first message is always preserved). Generous so a
     * normal session is untouched — this only fights unbounded growth in very
     * long-running sessions. The per-request context budgeter
     * (`trimHistoryToBudget`) still does the real fitting; this just stops the
     * array itself from leaking memory across hundreds of turns.
     */
    private static readonly MAX_RETAINED_MESSAGES = 40;
    /**
     * Older internal tool-result messages (read_file / list_files / list_brain /
     * read_brain dumps) are the bulkiest part of history and add little once the
     * conversation has moved on. Anything older than the most recent
     * `RECENT_FULL_MESSAGES` gets its bulky tool-result content shrunk to this
     * many characters. Recent messages are kept full for conversation continuity.
     */
    private static readonly RECENT_FULL_MESSAGES = 16;
    private static readonly OLD_TOOL_RESULT_CAP = 600;

    private chatHistory: ChatMessage[] = [];
    private abortController: AbortController | null = null;
    private webview: vscode.Webview | undefined;
    private historyChangeListener: HistoryChangeListener | undefined;
    private runSerial = 0;
    private activeRunId = 0;
    // v2.2.69 — 모드 전환 감지용. handlePrompt 진입 시 현재 mode signature 를 계산해
    // 직전 값과 다르면 system prompt 에 "이전 대화에서 ... 모드 전환됨" 한 줄을 끼운다.
    // mode signature 는 (agent skill, multiAgent, company mode, 활성 brain) 의 해시.
    private _lastModeSignature: string | null = null;
    private transactionManager: TransactionManager;
    private sessionManager: SessionManager;
    private statusBarManager: StatusBarManager;
    private memoryManager: MemoryManager;
    private retrievalOrchestrator: RetrievalOrchestrator;
    private currentTaskId: string = 'default_session';

    /**
     * Per-turn 컨텍스트 — 옛 3개 분산 state slot 을 하나로 묶음. 옛 코드는
     * `_lastRetrievalInfo`, `_lastLessonContents`, `_lastKnowledgeMix` 가 따로
     * 박혀 있어서 turn abort 시 *어느 것* 을 reset 해야 하는지 분산. 한 객체로
     * 통합하고 `resetTurnContext()` 한 메서드로 일괄 정리.
     */
    private _turnCtx: {
        /** buildMemoryContext 가 채움 — webview "scope used" footer 에 송신. */
        retrieval: {
            agentName: string | null;
            scoped: boolean;
            source: string;
            configuredFolders: string[];
            usedBrainFiles: string[];
            usedMemoryLayers: string[];
            lessonFiles: string[];
            totalChunks: number;
            selectedChunks: number;
        } | null;
        /** lesson card *본문* — Prevention Checklist 미준수 검사용. */
        lessons: string[];
        /** 이번 turn 에 결정된 Knowledge Mix — scope footer 표시용. */
        knowledgeMix: ResolvedKnowledgeMix | null;
        /**
         * 동적 시스템 프롬프트 블록 레지스트리 — turn 마다 memoryContext 가 채우고
         * buildAstraModeSystemPrompt 가 iterate 해서 prompt 에 주입.
         *
         * 옛 구조: conflictWarnings/coveChecklist/intentClarification/citationTrace/terminology
         * 5개 named field + 5개 reset + 5개 named param + 5개 ternary gate (총 25곳 edit).
         * 새 구조: 1 Map. 새 블록 추가 = 1 set call.
         *
         * Key 는 디버그·재정의용 id (예: 'conflict-warnings'). Value 는 이미 빌드된
         * 블록 본문 — 빈 문자열이면 주입 안 함. casual mode 게이팅은 호출자가 처리.
         */
        dynamicBlocks: Map<string, string>;
        /** Self-check 용 — selected chunks 의 (title, content) 요약. memoryContext 가 채움. */
        selfCheckSources: Array<{ title: string; excerpt: string }>;
        /** Confidence Engine 검색 신호 (Phase 2) — memoryContext 가 채움. */
        confidenceSignals: import('./intelligence/confidenceEngine').RetrievalConfidenceSignals | null;
    } = {
        retrieval: null,
        lessons: [],
        knowledgeMix: null,
        dynamicBlocks: new Map(),
        selfCheckSources: [],
        confidenceSignals: null,
    };

    /** Per-turn state 일괄 정리. turn 시작/abort/load session 시 호출. */
    private resetTurnContext(): void {
        this._turnCtx.retrieval = null;
        this._turnCtx.lessons = [];
        this._turnCtx.knowledgeMix = null;
        this._turnCtx.dynamicBlocks.clear();
        this._turnCtx.selfCheckSources = [];
        this._turnCtx.confidenceSignals = null;
    }

    private readonly options: AgentExecutorOptions;

    constructor(
        private context: vscode.ExtensionContext,
        options: AgentExecutorOptions = {}
    ) {
        this.options = options;
        this.transactionManager = new TransactionManager();
        this.sessionManager = new SessionManager(this.context);
        this.statusBarManager = new StatusBarManager();

        // Initialize 5-Layer Cognitive Memory System
        const activeBrain = getActiveBrainProfile();
        const initConfig = getConfig();
        this.memoryManager = new MemoryManager(activeBrain.localBrainPath, {
            enabled: initConfig.memoryEnabled,
            shortTermLimit: initConfig.memoryShortTermMessages,
        });

        // Initialize RAG Pipeline Orchestrator
        this.retrievalOrchestrator = new RetrievalOrchestrator();

        this.restoreLastSession();
    }

    private async restoreLastSession() {
        return restoreLastSessionFn({
            sessionManager: this.sessionManager,
            setChatHistory: (h) => { this.chatHistory = h; },
            setCurrentTaskId: (t) => { this.currentTaskId = t; },
        });
    }

    public setWebview(webview: vscode.Webview) {
        this.webview = webview;
    }

    public setHistoryChangeListener(listener: HistoryChangeListener) {
        this.historyChangeListener = listener;
    }

    public getHistory() {
        return this.chatHistory.filter(message => !message.internal || message.role === 'assistant');
    }

    public setHistory(history: ChatMessage[]) {
        this.chatHistory = history;
        this.emitHistoryChanged();
    }

    public clearHistory() {
        // Extract memories before clearing
        if (this.chatHistory.length > 2) {
            this.onSessionEnd();
        }
        this.chatHistory = [];
        // v2.2.69 — 새 세션엔 "이전 모드" 가 없음. mode signature 초기화하지 않으면 첫 메시지에서
        // 직전 세션의 mode 와 비교돼 잘못된 bridge 가 끼는 회귀가 생긴다.
        this._lastModeSignature = null;
        this.emitHistoryChanged();
    }

    public stop() {
        this.activeRunId = ++this.runSerial;
        if (this.abortController) {
            this.abortController.abort();
            this.abortController = null;
        }
    }

    public resetConversation() {
        this.stop();
        // Extract memories before resetting
        if (this.chatHistory.length > 2) {
            this.onSessionEnd();
        }
        this.chatHistory = [];
        this._lastModeSignature = null;
        this.emitHistoryChanged();
    }

    public async approveTransaction() {
        if (!this.transactionManager.isActive()) return;
        this.transactionManager.commit();
        agentEvents.emit(AgentEventTypes.TRANSACTION_COMMITTED);
        this.statusBarManager.updateStatus(AgentStatus.Success, 'Changes committed.');
        this.webview?.postMessage({ type: 'streamChunk', value: '\n✅ **작업이 승인되어 반영되었습니다.**' });
    }

    public async rejectTransaction() {
        if (!this.transactionManager.isActive()) return;
        this.transactionManager.rollback();
        agentEvents.emit(AgentEventTypes.TRANSACTION_ROLLED_BACK);
        this.statusBarManager.updateStatus(AgentStatus.Idle, 'Changes rolled back.');
        this.webview?.postMessage({ type: 'streamChunk', value: '\n❌ **작업이 거부되어 모든 변경사항이 취소되었습니다.**' });
        // The user judged this change wrong — a good moment to capture why, so it doesn't recur.
        this.webview?.postMessage({ type: 'lessonCandidate', value: { trigger: 'rejected' } });
    }

    public async handlePrompt(
        prompt: string | null,
        modelName: string,
        options: {
            brainEnabled?: boolean,
            loopDepth?: number,
            visionContent?: any[],
            temperature?: number,
            systemPrompt?: string,
            runId?: number,
            agentSkillContext?: string,
            agentSkillFile?: string,
            negativePrompt?: string,
            designerContext?: string,
            /**
             * Pre-formatted architecture-context block (`[ACTIVE PROJECT ARCHITECTURE CONTEXT]…`)
             * built by sidebarProvider from the active project's architecture doc.
             * Empty/undefined when project mode is off or auto-attach is disabled.
             */
            projectArchitectureContext?: string,
            secondBrainTraceEnabled?: boolean,
            secondBrainTraceDebug?: boolean,
            brainProfileId?: string
        }
    ) {
        const {
            brainEnabled = false,
            loopDepth = 0,
            visionContent,
            temperature = getConfig().chatTemperature,
            systemPrompt = getSystemPrompt()
        } = options;
        const { ollamaUrl, defaultModel: configDefaultModel, timeout, multiAgentEnabled } = getConfig();
        const runId = options.runId ?? (loopDepth === 0 ? ++this.runSerial : this.activeRunId);

        // Decide whether to use Multi-Agent Workflow as an internal execution strategy.
        // [Critical Fix] 사용자가 에이전트를 명시적으로 선택한 경우, 해당 에이전트의 system prompt를
        // 최우선으로 적용해야 하므로 멀티에이전트 워크플로우 분기를 우회합니다.
        const hasExplicitAgentSelection = !!options.agentSkillContext;
        if (loopDepth === 0 && !hasExplicitAgentSelection && shouldUseMultiAgentWorkflow(prompt || '', multiAgentEnabled)) {
            return this.executeMultiAgentWorkflow(prompt!, modelName, options);
        }

        const hasVisionContent = Array.isArray(visionContent) ? visionContent.length > 0 : !!visionContent;
        const isCasualConversation = prompt ? isCasualConversationPrompt(prompt) : false;
        let requestTimeoutHandle: ReturnType<typeof setTimeout> | undefined;

        if (!this.webview) return;

        // Telemetry: wall-clock start of the user-visible turn. Only meaningful
        // at loopDepth===0 (action-loop recursions roll up into the same turn).
        const turnStartMs = loopDepth === 0 ? Date.now() : 0;

        try {
            // 0. Safety Check: Rollback any dangling transaction from previous runs
            if (this.transactionManager.isActive()) {
                logInfo('Cleaning up dangling transaction from previous session.');
                this.transactionManager.rollback();
            }

            this.statusBarManager.updateStatus(AgentStatus.Thinking);
            if (loopDepth === 0) {
                if (this.abortController) {
                    this.abortController.abort();
                    this.abortController = null;
                }
                this.activeRunId = runId;
                this.currentTaskId = `task_${Date.now()}`;
                await this.context.workspaceState.update('lastActionStr', undefined);
                // Clear last-turn retrieval telemetry up front: when a casual turn (or anything else) skips
                // buildMemoryContext, the previous turn's value would otherwise leak into this turn's
                // "참조 범위" footer (the exact "안녕 → 🔎 참조: 에피소드기억" bug).
                this.resetTurnContext();
            }

            // 1. Prepare Context
            const workspaceFolders = vscode.workspace.workspaceFolders;
            const rootPath = workspaceFolders ? workspaceFolders[0].uri.fsPath : '';

            const config = getConfig();
            const activeBrain = options.brainProfileId
                ? (config.brainProfiles.find((profile) => profile.id === options.brainProfileId) || getActiveBrainProfile())
                : getActiveBrainProfile();
            // Per-turn context blocks → src/agent/handlePrompt/buildTurnContextBlocks.ts
            const {
                contextBlock: baseContextBlock,
                brainContext,
                brainInventoryCtx,
                brainFiles,
                brainPreview,
                localPathContext,
                secondBrainTrace,
            } = buildTurnContextBlocks({
                prompt,
                options,
                isCasualConversation,
                loopDepth,
                config,
                activeBrain,
                chatHistory: this.chatHistory,
                rootPath,
            });
            void brainPreview;
            // [일정/할일 실데이터] "오늘 업무 목록" 류 질의는 RAG(두뇌)가 아니라
            // Google Calendar/Tasks 가 진실의 원천 — 감지 시 실데이터 블록을 주입.
            // 미주입 시 모델이 모른다고 하거나 지어내는 문제의 수정.
            let contextBlock = baseContextBlock;
            if (prompt && loopDepth === 0 && !isCasualConversation && isScheduleRequest(prompt)) {
                try {
                    contextBlock += `\n\n${await buildScheduleContext(this.context, prompt)}`;
                } catch (e: any) {
                    logError('Schedule context 주입 실패 (계속 진행).', { error: e?.message ?? String(e) });
                }
            }

            // [자기 평가 정본 주입] 기능 개선/자기 평가 질의는 RAG 경쟁에 맡기지 않고
            // 현행 기능 인벤토리를 결정론적으로 주입 — 모델이 검색 없이 기억으로 답해
            // 이미 있는 기능을 신규 제안하던 구식화 버그(3회 재발)의 마지막 구멍 봉쇄.
            if (prompt && loopDepth === 0 && !isCasualConversation && activeBrain?.localBrainPath
                && (isSelfAssessRequest(prompt) || (isAnalysisRequest(prompt) && isAboutSelf(prompt)))) {
                try {
                    // 인벤토리 lazy 재생성 — 활성화 시 1회 생성은 brain 볼륨이 늦게
                    // 마운트되면 조용히 건너뛰어 파일이 영영 없는 상태가 됐다 (그 결과
                    // "파일 없음" 안내만 주입돼 모델이 구현 여부를 알 수 없었음).
                    // 질의 시점에 한 번 더 보장. idempotent — 있으면 즉시 return.
                    await ensureFeatureInventory(this.context);
                    const selfAssessBlock = buildSelfAssessContext(activeBrain.localBrainPath);
                    contextBlock += `\n\n${selfAssessBlock}`;
                    // 성공 로그 필수 — "주입이 됐는데 모델이 무시" vs "주입 자체가 안 됨"을
                    // 구분 못 해 같은 버그를 3번 쫓았다. 실패 모드는 관측 가능해야 한다.
                    logInfo('자기 평가 인벤토리 주입.', { chars: selfAssessBlock.length, promptPreview: prompt.slice(0, 60) });
                } catch (e: any) {
                    logError('자기 평가 컨텍스트 주입 실패 (계속 진행).', { error: e?.message ?? String(e) });
                }
            }

            // [근거 기반 분석 강제] 분석/검토/의견형 요청인데 모델이 코드를 읽지 않고
            // "~로 보입니다" 추측으로 답하는 실패 모드 차단. 워크스페이스가 열려 있으면
            // "주장 전에 read_file 로 실제 확인하라"는 지시를 주입 — 강제 주입 패턴의
            // 5번째 적용 (일정→캘린더, 자기평가→인벤토리, 정정→캡처, URL→실데이터와 동일).
            if (prompt && loopDepth === 0 && !isCasualConversation && isAnalysisRequest(prompt)
                && vscode.workspace.workspaceFolders?.length) {
                contextBlock += `\n\n[근거 기반 분석 규칙 — 이 요청은 분석/검토형]
- 이 워크스페이스의 코드·문서·기능에 대한 주장은 *이 대화에서 실제로 읽은 파일*에만 근거하라.
- 확인하지 않은 구현을 "~로 보입니다", "~일 것입니다"라고 추측 서술하는 것은 금지. 먼저 <list_files path="..."/> 와 <read_file path="..."/> 태그로 관련 파일을 직접 열어 확인한 뒤 답하라. 태그를 emit 하면 시스템이 파일 내용을 주입하고 자동으로 이어서 답변하게 된다.
- ⚠️ "소스 코드 확인이 필요합니다"라고 말만 하고 끝내는 것은 금지다. 확인이 필요하다고 판단했다면 *바로 이 답변 안에서* <list_files>/<read_file> 태그를 emit 하라 — 그것이 확인하는 방법이다. 태그로 접근 불가능한 대상(외부 시스템·미설치 도구 등)에 한해서만 "확인하지 못함"으로 명시하라.
- "X 기능을 추가하라"고 제안하기 전에 그 기능이 이미 구현돼 있는지 해당 모듈을 찾아 읽어라. 이미 있는 기능을 새로 만들라고 제안하는 것은 잘못된 분석이다.
- 일반론·추측으로 빈칸을 채우지 마라.`;
            }

            // [URL 실데이터] 채팅 프롬프트에 URL 이 있으면 본문을 추출해 주입.
            // /wikify 만 URL 접근이 가능하고 일반 채팅은 "접근 불가"라고 답하던 공백 수정.
            // v2: Bridge 추출 → 직접 fetch 폴백 (urlContext 내부) + 최대 2개 URL + config 게이트.
            if (prompt && loopDepth === 0 && !isCasualConversation && getConfig().webAutoFetchEnabled !== false) {
                const urls = extractUrls(prompt, 2);
                for (const url of urls) {
                    try {
                        contextBlock += `\n\n${await buildUrlContext(url)}`;
                        logInfo('URL 컨텍스트 주입 시도.', { url });
                    } catch (e: any) {
                        logError('URL 컨텍스트 주입 실패 (계속 진행).', { error: e?.message ?? String(e) });
                    }
                }
            }

            // [Correction Loop ①] 이 발화가 직전 답변에 대한 *정정*이면 fire-and-forget
            // 캡처 — 오류 분류 → 태깅 레슨 + 회귀 케이스(.astra/eval/corrections.jsonl).
            // 정정 자체가 Ground Truth 가 되어 주간 회귀 테스트·약점 프로필의 원료가 된다.
            // 턴 응답을 막지 않는다 (await 없음).
            if (prompt && loopDepth === 0 && activeBrain?.localBrainPath && looksLikeCorrection(prompt)) {
                const visible = this.chatHistory.filter(m => !m.internal);
                const lastAssistant = [...visible].reverse().find(m => m.role === 'assistant');
                const lastUserIdx = lastAssistant ? visible.lastIndexOf(lastAssistant) - 1 : -1;
                const priorQuestion = lastUserIdx >= 0 && visible[lastUserIdx]?.role === 'user' ? visible[lastUserIdx].content : '';
                if (lastAssistant && priorQuestion) {
                    void captureCorrection({
                        brainPath: activeBrain.localBrainPath,
                        question: priorQuestion,
                        wrongAnswer: lastAssistant.content,
                        correction: prompt,
                        llm: { baseUrl: config.ollamaUrl, model: configDefaultModel },
                    }).then(file => {
                        if (file) logInfo('Correction Loop: 정정 캡처 완료.', { lesson: file });
                    }).catch((e: any) => logError('Correction Loop 캡처 실패 (무시).', { error: e?.message ?? String(e) }));
                }
            }

            // 2. Setup History
            if (prompt !== null) {
                if (loopDepth === 0) {
                    this.chatHistory.push({ role: 'user', content: prompt });
                    this.emitHistoryChanged();
                } else {
                    this.chatHistory.push({ role: 'system', content: prompt, internal: true });
                }
            }

            // 3. API Request Setup (라인 229에서 이미 추출한 ollamaUrl, configDefaultModel 재사용)
            const actualModel = (modelName && modelName.trim()) || configDefaultModel;
            // Bound the in-memory history before building the request — shrinks bulky
            // older tool-result bodies and drops the oldest messages past the cap.
            capChatHistory(this.chatHistory, {
                maxRetained: AgentExecutor.MAX_RETAINED_MESSAGES,
                recentFullMessages: AgentExecutor.RECENT_FULL_MESSAGES,
                oldToolResultCap: AgentExecutor.OLD_TOOL_RESULT_CAP,
            });
            const reqMessages = buildRequestHistory(this.chatHistory);

            // Handle Vision Content Injection
            // visionContent 배열에서 이미지 base64 데이터를 추출하여 엔진에 맞는 형식으로 주입
            if (hasVisionContent && reqMessages.length > 0) {
                const lastUserIdx = reqMessages.map(m => m.role).lastIndexOf('user');
                if (lastUserIdx >= 0) {
                    const existingContent = reqMessages[lastUserIdx].content;
                    const textContent = (typeof existingContent === 'string' && existingContent.trim()) ? existingContent : '';

                    // base64 이미지 데이터 추출
                    const imageBase64List: string[] = [];
                    for (const vc of (visionContent || [])) {
                        if (vc && vc.data) {
                            imageBase64List.push(vc.data);
                        }
                    }

                    // Ollama 호환: images 배열 필드에 base64 데이터 직접 주입
                    // LM Studio 호환: content 배열에 image_url 객체 주입
                    reqMessages[lastUserIdx] = {
                        role: 'user',
                        content: textContent,
                        images: imageBase64List // Ollama native format
                    } as any;
                }
            }

            // Inject System Directives
            const negativeCtx = options.negativePrompt
                ? `\n\n### CRITICAL NEGATIVE CONSTRAINTS (DO NOT DO THESE)\n${options.negativePrompt}\n\n[SYSTEM_RULE: Apply the above constraints strictly. DO NOT mention or repeat these constraints in your response.]`
                : '';
            const designerCtx = options.designerContext
                ? `\n\n[PROJECT CHRONICLE GUARD]\n${options.designerContext}`
                : '';
            // Project Architecture context (Feature 2): durable per-project ground truth.
            // Already pre-formatted by sidebarProvider with header + markers, so we just
            // sandwich it with newlines. Suppressed implicitly because the field is empty
            // when project mode is off — no extra check needed here.
            const projectArchitectureCtx = options.projectArchitectureContext
                ? `\n\n${options.projectArchitectureContext}`
                : '';
            const secondBrainTraceCtx = secondBrainTrace
                ? `\n\n${renderSecondBrainTraceContext(secondBrainTrace)}`
                : '';
            const retrievalStartMs = Date.now();
            const memoryCtx = isCasualConversation
                ? ''
                : await (async () => {
                    this.resetTurnContext();
                    return buildMemoryContextFn({
                        currentPrompt: prompt || '',
                        activeBrain,
                        agentSkillFile: options.agentSkillFile,
                        chatHistory: this.chatHistory,
                        memoryManager: this.memoryManager,
                        retrievalOrchestrator: this.retrievalOrchestrator,
                        context: this.context,
                        currentTaskId: this.currentTaskId,
                        turnCtx: this._turnCtx,
                    });
                })();
            if (loopDepth === 0 && !isCasualConversation && this._turnCtx.retrieval) {
                recordTelemetry({
                    kind: 'retrieval',
                    durationMs: Date.now() - retrievalStartMs,
                    brainFiles: this._turnCtx.retrieval.usedBrainFiles.length,
                    memoryLayers: this._turnCtx.retrieval.usedMemoryLayers,
                    note: `chunks=${this._turnCtx.retrieval.selectedChunks}/${this._turnCtx.retrieval.totalChunks} lessons=${this._turnCtx.retrieval.lessonFiles.length}`,
                });
            }
            const knowledgeContextForPrompt = isCasualConversation
                ? ''
                : `${brainContext}${brainInventoryCtx}`;

            // ──────────────────────────────────────────────────────────────────
            // [Agent Mode v3] 에이전트가 선택된 경우, Astra 기본 포맷/페르소나 섹션을
            // 제거하고 에이전트 프롬프트를 최후단에 배치하여 절대 우선 적용.
            // ──────────────────────────────────────────────────────────────────
            const isAgentMode = !!options.agentSkillContext;

            // 모드 전환 bridge → src/agent/handlePrompt/buildModeBridgeContext.ts
            const _bridge = buildModeBridgeContext({
                options,
                lastModeSignature: this._lastModeSignature,
                chatHistory: this.chatHistory,
            });
            const modeBridgeCtx = _bridge.modeBridgeCtx;
            if (_bridge.newSignature !== null) {
                this._lastModeSignature = _bridge.newSignature;
            }
            // [PRIOR TURN CONCLUSION] — 직전 assistant 답변의 첫 문장을 anchor 로 주입.
            // follow-up 정정/보강 시 모델이 그 결론을 *재평가* 의 출발점으로 삼게.
            const priorConclusionCtx = loopDepth === 0
                ? buildPriorTurnConclusionContext(this.chatHistory)
                : '';
            // System prompt build (agent vs astra mode) → src/agent/handlePrompt/{buildAgentModeSystemPrompt,buildAstraModeSystemPrompt}.ts
            const fullSystemPrompt: string = isAgentMode
                ? buildAgentModeSystemPrompt({
                    systemPrompt,
                    agentSkillContext: options.agentSkillContext || '',
                    modeBridgeCtx,
                    priorConclusionCtx,
                    designerCtx,
                    secondBrainTraceCtx,
                    memoryCtx,
                    knowledgeContextForPrompt,
                    contextBlock,
                    negativeCtx,
                    actualModel,
                    contextLength: config.contextLength,
                    dynamicBlocks: this._turnCtx.dynamicBlocks,
                })
                : buildAstraModeSystemPrompt({
                    prompt,
                    systemPrompt,
                    modeBridgeCtx,
                    priorConclusionCtx,
                    designerCtx,
                    projectArchitectureCtx,
                    secondBrainTraceCtx,
                    memoryCtx,
                    knowledgeContextForPrompt,
                    contextBlock,
                    negativeCtx,
                    isCasualConversation,
                    localPathContext,
                    knowledgeMix: this._turnCtx.knowledgeMix,
                    dynamicBlocks: this._turnCtx.dynamicBlocks,
                });
            // Context budget computation → src/agent/handlePrompt/computeBudgetedRequest.ts
            const imageCount = (reqMessages as any[])
                .reduce((n, m) => n + (Array.isArray(m?.images) ? m.images.length : 0), 0);
            // Budget against the model's REAL loaded window, not just the user's
            // contextLength setting. Best-effort + cached; only for the LM Studio
            // SDK path (REST/Ollama/cloud expose no such query → undefined → prior behavior).
            let actualContextLength: number | undefined;
            try {
                const _isCloud = (() => {
                    try {
                        const { parseModelPrefix } = require('./features/providers') as typeof import('./features/providers');
                        return !!parseModelPrefix(actualModel);
                    } catch { return false; }
                })();
                if (!_isCloud
                    && resolveEngine(ollamaUrl) === 'lmstudio'
                    && this.options.lmStudioStreamer?.getModelContextLength) {
                    actualContextLength = await this.options.lmStudioStreamer.getModelContextLength(actualModel);
                }
            } catch { /* best-effort — fall back to configured window */ }

            // ── Large-input Map-Reduce ────────────────────────────────────────
            // When a SINGLE user message is too big to fit the (real) window,
            // history-trimming can't help — you can't drop the current question.
            // Chunk it, extract only the request-relevant facts per chunk, and
            // integrate, then let the normal streaming path answer from the
            // condensed context. Only the user-visible turn; casual chat skipped.
            if (loopDepth === 0 && !isCasualConversation && config.largeInputMapReduce) {
                try {
                    const effWindow = (typeof actualContextLength === 'number' && actualContextLength > 0)
                        ? Math.min(config.contextLength, actualContextLength)
                        : config.contextLength;
                    const lastUserIdx = reqMessages.map((m) => m.role).lastIndexOf('user');
                    const lastUser = lastUserIdx >= 0 ? reqMessages[lastUserIdx] : undefined;
                    const content = typeof lastUser?.content === 'string' ? lastUser.content : '';
                    const sysTokens = estimateTokens(fullSystemPrompt) + 4;
                    const mrCfg = {
                        enabled: true,
                        triggerRatio: config.mapReduceTriggerRatio,
                        concurrency: config.mapReduceConcurrency,
                        maxDepth: config.mapReduceMaxDepth,
                        showProvenance: config.mapReduceShowProvenance,
                    };
                    if (lastUser && shouldMapReduce(estimateTokens(content), effWindow, mrCfg)) {
                        const intent = content.length > 1400
                            ? `${content.slice(0, 800)}\n…\n${content.slice(-400)}`
                            : content;
                        const mrEngine = resolveEngine(ollamaUrl);
                        this.webview?.postMessage({ type: 'mapReduceStatus', value: { phase: 'start' } });
                        const mr = await runMapReduce(
                            {
                                callLLM: async (messages, maxTokens) => {
                                    const r = await this.callNonStreaming({
                                        baseUrl: ollamaUrl,
                                        modelName: actualModel,
                                        engine: mrEngine,
                                        messages,
                                        temperature: 0.1,
                                        maxTokens,
                                        contextLength: effWindow,
                                        signal: this.abortController?.signal,
                                    });
                                    return r.text;
                                },
                                estimateTokens,
                                log: (msg, meta) => logInfo(msg, meta),
                                signal: this.abortController?.signal,
                            },
                            { intent, largeContent: content, windowTokens: effWindow, systemTokens: sysTokens, safetyMargin: config.contextSafetyMargin, cfg: mrCfg },
                        );
                        // allIrrelevant → keep original (budgeter truncates) rather than forcing an empty context.
                        if (!mr.allIrrelevant && mr.condensedContext.trim()) {
                            reqMessages[lastUserIdx] = {
                                ...lastUser,
                                content: `${intent}\n\n──────── 추출된 관련 자료 (원본 ${mr.chunkCount}조각 중 ${mr.relevantCount}조각, 통합 ${mr.reduceDepth}단계) ────────\n${mr.condensedContext}`,
                            } as any;
                            logInfo('Large input condensed via map-reduce.', {
                                model: actualModel, chunkCount: mr.chunkCount, relevantCount: mr.relevantCount, reduceDepth: mr.reduceDepth,
                            });
                        }
                        this.webview?.postMessage({
                            type: 'mapReduceStatus',
                            value: { phase: 'done', chunkCount: mr.chunkCount, relevantCount: mr.relevantCount, allIrrelevant: mr.allIrrelevant },
                        });
                    }
                } catch (e: any) {
                    // Any failure → fall through to the normal (single-shot) path. Worst case the
                    // budgeter truncates the oversized input, which is the prior behavior.
                    logError('Large-input map-reduce failed — falling back to single-shot path.', { error: e?.message ?? String(e) });
                    this.webview?.postMessage({ type: 'mapReduceStatus', value: { phase: 'error' } });
                }
            }

            const _budget = computeBudgetedRequest({
                fullSystemPrompt,
                reqMessages,
                actualModel,
                config,
                imageCount,
                actualContextLength,
            });
            const messagesForRequest = _budget.messagesForRequest;
            const ctxLimits = _budget.ctxLimits;
            const inputTokens = _budget.inputTokens;
            const maxOutputTokens = _budget.maxOutputTokens;
            const systemTokens = _budget.systemTokens;
            const systemTruncated = _budget.systemTruncated;
            const modelParamB = _budget.modelParamB;
            const cappedForSmallModel = _budget.cappedForSmallModel;
            const outputBudget = _budget.outputBudget;
            const budgetedHistory = { length: _budget.budgetedHistoryLength };
            let finishStopReason: string | undefined;

            // 4. Call AI Engine
            this.abortController = new AbortController();
            requestTimeoutHandle = setTimeout(() => {
                logError('AI request timed out.', { timeoutMs: timeout, model: actualModel, loopDepth });
                this.abortController?.abort();
            }, timeout);

            // Cloud provider 라우팅 — actualModel 의 prefix 가 cloud 면 SDK / 로컬 REST 경로 둘 다 우회.
            // SSE 파서 입장에서는 동일한 OpenAI 호환 stream 이 들어오므로 consumer 변경 없음.
            const _cloudHit = (() => {
                try {
                    const { parseModelPrefix } = require('./features/providers') as typeof import('./features/providers');
                    return parseModelPrefix(actualModel);
                } catch { return null; }
            })();
            const engine = _cloudHit ? 'lmstudio' : resolveEngine(ollamaUrl);
            const useLmStudioSdk = !_cloudHit && engine === 'lmstudio' && !!this.options.lmStudioStreamer;
            let apiUrl = '';
            let aiResponseText = '';
            let buffer = '';

            if (loopDepth === 0) {
                // Context-budget preview so the UI can show what actually went into this turn
                // (≈N tokens, Brain N files, open file included?, history compacted?, small-model warning).
                this.webview.postMessage({
                    type: 'contextBudget',
                    value: {
                        model: actualModel,
                        engine,
                        paramB: modelParamB,
                        contextLength: ctxLimits.contextLength,
                        nominalContextLength: config.contextLength,
                        actualContextLength,
                        windowMismatch: _budget.windowMismatch,
                        cappedForSmallModel,
                        inputTokens,
                        maxOutputTokens,
                        systemTokens,
                        historyKept: budgetedHistory.length,
                        droppedHistory: reqMessages.length - budgetedHistory.length,
                        systemTruncated,
                        includesOpenFile: !!contextBlock && contextBlock.includes('[Currently open file:'),
                        brainFiles: brainFiles.length,
                        imageCount,
                        tight: outputBudget.tight,
                        smallModel: cappedForSmallModel || (modelParamB !== null && modelParamB <= 3 && inputTokens > 12000),
                    },
                });
                // If the user's message reads like a regression complaint ("또 안 돼", "비슷한 실수", "왜 반복돼"…),
                // offer to record a lesson — a recurring problem is exactly what Experience Memory is for.
                if (prompt && isQaRegressionFeedback(prompt)) {
                    this.webview.postMessage({ type: 'lessonCandidate', value: { trigger: 'qa-feedback' } });
                }
                this.webview.postMessage({ type: 'streamStart' });
                this.options.onStreamLifecycle?.start();
            }

            // Progressive answering: live-stream tokens to the webview during
            // the user-visible first turn (loopDepth === 0). The bubble fills
            // as the model generates instead of dropping all at once at the end,
            // and any auto-continuation rounds keep posting deltas through the
            // same channel. Post-processing (reasoning strip / sanitize /
            // policy enforcement) emits a final `streamReplace` so the bubble
            // ends up matching the cleaned answer regardless of what slipped
            // through live.
            // [Clean Stream] g1nation.liveStreamTokens=false (기본) 이면 토큰을 내부에만
            // 누적하고 sanitize 끝난 최종 답변만 한 번에 표시 → Harmony/think 마커가 잠깐
            // 화면에 노출되는 누설을 원천 차단한다. true 로 두면 legacy 라이브 스트리밍.
            const postLiveDeltas = loopDepth === 0 && getConfig().liveStreamTokens === true;

            let lmStudioStats: ChatStreamStats | undefined;
            if (useLmStudioSdk) {
                apiUrl = `${ollamaUrl} (sdk)`;
                logInfo('Streaming chat via LM Studio SDK.', { model: actualModel });
                try {
                    const stream = this.options.lmStudioStreamer!.stream({
                        modelName: actualModel,
                        messages: messagesForRequest.map((m) => ({ role: m.role, content: m.content })),
                        temperature,
                        maxTokens: maxOutputTokens,
                        contextOverflowPolicy: config.contextOverflowPolicy,
                        ...lmStudioSamplingFromConfig(),
                        ...lmStudioRespondExtrasFromConfig(),
                        signal: this.abortController.signal,
                    });
                    for await (const { token, stopReason, stats } of stream) {
                        if (this.isStaleRun(runId)) return;
                        if (token) {
                            aiResponseText += token;
                            if (postLiveDeltas) this.webview.postMessage({ type: 'streamChunk', value: token });
                        }
                        if (stopReason) finishStopReason = stopReason;
                        if (stats) lmStudioStats = stats;
                    }
                    if (lmStudioStats && getConfig().lmStudioShowStatsInBudget && loopDepth === 0) {
                        this.webview.postMessage({
                            type: 'lmStudioStats',
                            value: {
                                model: actualModel,
                                tokensPerSecond: lmStudioStats.tokensPerSecond,
                                timeToFirstTokenSec: lmStudioStats.timeToFirstTokenSec,
                                predictedTokensCount: lmStudioStats.predictedTokensCount,
                                promptTokensCount: lmStudioStats.promptTokensCount,
                                totalTimeSec: lmStudioStats.totalTimeSec,
                                draftModelKey: lmStudioStats.draftModelKey,
                                draftTokensCount: lmStudioStats.draftTokensCount,
                                acceptedDraftTokensCount: lmStudioStats.acceptedDraftTokensCount,
                                stopReason: finishStopReason,
                            },
                        });
                    }
                } catch (err: any) {
                    if (err?.name === 'AbortError' || this.abortController.signal.aborted) {
                        logInfo('Generation aborted by user.');
                    } else {
                        const msg = err?.message ?? String(err);
                        if (/context\s*length|contextlengthreached|exceed|too\s*long/i.test(msg)) {
                            finishStopReason = 'contextLengthReached';
                        }
                        logError('LM Studio SDK chat failed.', { engine, error: msg });
                        this.webview?.postMessage({ type: 'error', value: `LM Studio: ${msg}` });
                    }
                }
            } else {
                const request = await this.createStreamingRequest({
                    baseUrl: ollamaUrl,
                    modelName: actualModel,
                    reqMessages: messagesForRequest,
                    temperature,
                    maxTokens: maxOutputTokens,
                    contextLength: ctxLimits.contextLength
                });
                const { response, apiUrl: restApiUrl } = request;
                apiUrl = restApiUrl;
                if (this.isStaleRun(runId)) return;

                const reader = response.body?.getReader();
                if (!reader) throw new Error("Response body is not readable.");

                const decoder = new TextDecoder();
                // try/finally guarantees the reader's lock is released on every
                // exit path (normal end, AbortError, parse exception, stale-run
                // early return). Without this, downstream consumers — including
                // any retry path that wants to drain the same body — fail with
                // "lock() request could not be registered" because the previous
                // reader still holds the stream lock.
                try {
                    while (true) {
                        const { done, value } = await reader.read();
                        if (done) break;
                        if (this.isStaleRun(runId)) return;

                        buffer += decoder.decode(value, { stream: true });
                        const lines = buffer.split('\n');
                        buffer = lines.pop() || '';
                        for (const line of lines) {
                            const trimmed = line.trim();
                            if (!trimmed || trimmed === 'data: [DONE]') continue;
                            try {
                                const raw = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
                                const json = JSON.parse(raw);
                                const token = engine === 'lmstudio' ? json.choices?.[0]?.delta?.content || '' : json.message?.content || json.response || '';
                                if (token) {
                                    aiResponseText += token;
                                    if (postLiveDeltas) this.webview.postMessage({ type: 'streamChunk', value: token });
                                }
                                const fr = engine === 'lmstudio'
                                    ? json.choices?.[0]?.finish_reason
                                    : (json.done_reason ?? (json.done === true ? 'stop' : undefined));
                                if (fr) finishStopReason = fr;
                            } catch (e: any) {
                                logError('Failed to parse streaming chunk.', { engine, apiUrl, chunk: summarizeText(trimmed, 300), error: e?.message || String(e) });
                            }
                        }
                    }
                } catch (err: any) {
                    if (err.name === 'AbortError') {
                        logInfo('Generation aborted by user.');
                    } else {
                        logError('Stream reading error.', { engine, apiUrl, error: err?.message || String(err) });
                        this.webview?.postMessage({ type: 'error', value: `Connection lost: ${err.message}` });
                    }
                } finally {
                    try { reader.releaseLock(); } catch { /* reader may already be released on AbortError */ }
                }
            }

            // Final buffer processing (REST SSE only — SDK has no trailing buffer)
            if (!useLmStudioSdk && buffer.trim() && buffer.trim() !== 'data: [DONE]') {
                try {
                    const trimmed = buffer.trim();
                    const raw = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
                    const json = JSON.parse(raw);
                    const token = engine === 'lmstudio' ? json.choices?.[0]?.delta?.content || '' : json.message?.content || json.response || '';
                    if (token) {
                        aiResponseText += token;
                        if (postLiveDeltas) this.webview.postMessage({ type: 'streamChunk', value: token });
                    }
                    const fr = engine === 'lmstudio'
                        ? json.choices?.[0]?.finish_reason
                        : (json.done_reason ?? (json.done === true ? 'stop' : undefined));
                    if (fr) finishStopReason = fr;
                } catch (e: any) {
                    logError('Failed to parse final streaming buffer.', { engine, apiUrl, buffer: summarizeText(buffer, 300), error: e?.message || String(e) });
                }
            }

            if (this.isStaleRun(runId)) return;
            if (requestTimeoutHandle) {
                clearTimeout(requestTimeoutHandle);
                requestTimeoutHandle = undefined;
            }

            // ── Empty-response auto-recovery ──
            // Streaming failed silently (network blip, model cold-start, context
            // overflow, etc.). Before surfacing the error to the user we try two
            // recovery steps in order:
            //
            //   (1) When the empty stream came from the LM Studio SDK path, drop
            //       the cached handle and retry streaming once. The SDK keeps a
            //       per-model handle in its internal map; an aborted prediction
            //       can leave that handle disposed so the next respond() returns
            //       zero tokens cleanly (no error thrown, stream just ends).
            //       A fresh WebSocket / handle lookup recovers from this without
            //       us having to ask the user to retry.
            //
            //   (2) Fall back to a single non-streaming POST. Many LM Studio
            //       failures are streaming-only (the SSE channel drops mid-token
            //       while one POST returns the whole answer fine).
            //
            // Only attempts recovery on loopDepth === 0 — we don't want to
            // ping-pong inside the autonomous action loop.
            //
            // Note: the previous SDK handle-reset retry that lived here is now done
            // inside `LMStudioStreamer.stream()` itself (it auto-recreates the SDK
            // on attempt 2 for both dead-handle errors *and* clean-but-empty streams),
            // so by the time we get here with `useLmStudioSdk` and no text, the SDK
            // path has already tried twice. Go straight to the REST fallback.
            if (!aiResponseText.trim() && !this.abortController?.signal.aborted && loopDepth === 0) {
                try {
                    logInfo('Empty stream — trying non-streaming fallback.', { engine, model: actualModel, apiUrl });
                    const fallback = await this.callNonStreaming({
                        baseUrl: ollamaUrl,
                        modelName: actualModel,
                        engine,
                        messages: messagesForRequest,
                        temperature,
                        maxTokens: maxOutputTokens,
                        contextLength: ctxLimits.contextLength,
                        signal: this.abortController?.signal,
                    });
                    if (fallback.stopReason) finishStopReason = fallback.stopReason;
                    if (fallback.text && fallback.text.trim()) {
                        aiResponseText = fallback.text;
                        logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.text.length });
                    }
                } catch (recoverErr: any) {
                    logError('Non-streaming fallback also failed.', {
                        engine, model: actualModel, error: recoverErr?.message ?? String(recoverErr),
                    });
                }
            }

            // ── Thought Quarantine + Final-only Retry + Auto-Continuation ──
            // The user is waiting for an answer, not for a chance to manage the generation engine:
            //   (a) hidden reasoning (Harmony channels, <think>…, "Thinking Process:") never reaches
            //       the screen — stripped here, and from what executeActions / chatHistory see;
            //   (b) if the model emitted *only* reasoning → silently retry, final-answer-only;
            //   (c) if the answer was cut off at the output ceiling → continue it internally with a
            //       *compressed* request (original question + the answer so far), up to N rounds.
            let cleaned = extractVisibleFinal(aiResponseText);
            if (cleaned.hadHiddenReasoning) {
                logInfo('Stripped hidden reasoning from the model output.', {
                    model: actualModel, hiddenChars: cleaned.hiddenReasoning.length,
                    visibleChars: cleaned.visible.length, hadFinalChannel: cleaned.hadFinalChannel,
                    thoughtOnly: cleaned.wasThoughtOnly,
                });
            }

            // (b) Final-only retry — the reply was reasoning-only, no visible answer.
            if (shouldFinalOnlyRetry(cleaned)
                && config.finalOnlyRetryOnThoughtLeak
                && loopDepth === 0
                && !this.abortController?.signal.aborted) {
                try {
                    this.webview.postMessage({ type: 'autoContinue', value: '답변을 정리하는 중입니다...' });
                    const retryMsgs: ChatMessage[] = messagesForRequest.map((m, i) =>
                        i === 0 ? { ...m, content: `${m.content}\n${FINAL_ONLY_DIRECTIVE}` } : m);
                    const r = await this.callNonStreaming({
                        baseUrl: ollamaUrl, modelName: actualModel, engine, messages: retryMsgs,
                        temperature, maxTokens: maxOutputTokens, contextLength: ctxLimits.contextLength,
                        signal: this.abortController?.signal,
                    });
                    if (r.stopReason) finishStopReason = r.stopReason;
                    const rc = extractVisibleFinal(r.text);
                    if (rc.visible.trim()) {
                        logInfo('Final-only retry recovered a visible answer.', { model: actualModel, length: rc.visible.length });
                        aiResponseText = r.text;
                        cleaned = rc;
                    }
                } catch (e: any) {
                    logError('Final-only retry failed.', { model: actualModel, error: e?.message ?? String(e) });
                }
            }

            // (c) Auto-continuation → src/agent/handlePrompt/applyAutoContinuation.ts
            let continuationCount = 0;
            if (config.autoContinueOnOutputLimit && config.maxAutoContinuations > 0 && loopDepth === 0) {
                const _cont = await applyAutoContinuation({
                    streamChatOnce: (p) => this.streamChatOnce(p),
                    isStaleRun: (id) => this.isStaleRun(id),
                    getAbortSignal: () => this.abortController?.signal,
                    getWebview: () => this.webview,
                }, {
                    cleaned,
                    finishStopReason,
                    prompt,
                    chatHistory: this.chatHistory,
                    maxOutputTokens,
                    ctxLimits,
                    config,
                    runId,
                    useLmStudioSdk,
                    engine,
                    ollamaUrl,
                    actualModel,
                    temperature,
                    postLiveDeltas,
                });
                cleaned = _cont.cleaned;
                finishStopReason = _cont.finishStopReason;
                continuationCount = _cont.continuationCount;
                if (this.isStaleRun(runId)) return;
            }
            // (c2) 한·영 깨진 토큰 수리 — "덩어리"→"덩ey" 류 토큰 붕괴를 결정론 감지
            // 후 1회 수리 패스로 복원. 검증 미통과 시 원문 유지 (악화 방지).
            if (loopDepth === 0 && cleaned.visible && !this.abortController?.signal.aborted) {
                try {
                    const { findBrokenHangulTokens, repairBrokenHangul } = await import('./agent/hangulHygiene');
                    const broken = findBrokenHangulTokens(cleaned.visible);
                    if (broken.length > 0) {
                        this.webview.postMessage({ type: 'autoContinue', value: '표기 오류 교정 중…' });
                        const repaired = await repairBrokenHangul(cleaned.visible, broken, async (system, user, maxTokens) => {
                            const r = await this.callNonStreaming({
                                baseUrl: ollamaUrl, modelName: actualModel, engine,
                                messages: [{ role: 'system', content: system }, { role: 'user', content: user }],
                                temperature: 0.1, maxTokens, contextLength: ctxLimits.contextLength,
                                signal: this.abortController?.signal,
                            });
                            return r.text;
                        });
                        if (repaired) {
                            logInfo('한·영 깨진 토큰 수리 완료.', { broken: broken.slice(0, 5), before: cleaned.visible.length, after: repaired.length });
                            cleaned = { ...cleaned, visible: repaired };
                        } else {
                            logInfo('한·영 깨진 토큰 감지 — 수리 검증 미통과, 원문 유지.', { broken: broken.slice(0, 5) });
                        }
                    }
                } catch (e: any) {
                    logError('한글 위생 수리 실패 (원문 유지).', { error: e?.message ?? String(e) });
                }
            }

            // 답변 sanitize / policy enforcement → src/agent/handlePrompt/processFinalAnswer.ts
            const _finalProc = processFinalAnswer({
                visibleAnswer: cleaned.visible,
                prompt,
                secondBrainTrace,
                localPathContext,
                activeBrain,
                brainFiles,
                finishStopReason,
                maxOutputTokens,
                actualModel,
                engine,
                inputTokens,
            });
            const cleanedVisible = _finalProc.cleanedVisible;
            const assistantContent = _finalProc.assistantContent;
            const finalAssistantContent = _finalProc.finalAssistantContent;
            const rationale = _finalProc.rationale;
            const outputTokens = _finalProc.outputTokens;
            const _stopKind = _finalProc.stopKind; void _stopKind;

            const assistantMessage: ChatMessage = { role: 'assistant', content: finalAssistantContent, internal: false, rationale };
            this.chatHistory.push(assistantMessage);
            this.emitHistoryChanged();

            this.statusBarManager.updateStatus(AgentStatus.Executing);
            // Action tags are honored only from the visible final answer — never from hidden reasoning.
            // Snapshot history length so we can tell whether the actions injected any content for the
            // model to interpret: read_file / list_files / read_brain / read_sheet push system messages,
            // while run_command (no stdout captured) and file writes inject nothing. Only the former
            // warrant a follow-up LLM call.
            const historyLenBeforeActions = this.chatHistory.length;
            const report = await this.executeActions(cleanedVisible, rootPath, activeBrain);
            const actionsInjectedContext = this.chatHistory.length > historyLenBeforeActions;
            // Self-Reflector Phase C — 일반 채팅 경로에서도 코드 파일 생성 직후
            // syntax 체크 실행. 옵션 OFF면 통째로 skip.
            try {
                const cfgC = getConfig();
                if (cfgC.selfReflectorExecutionEnabled && report.length > 0) {
                    const { verifyCreatedFiles } = await import('./features/selfReflector/selfReflectorExecution');
                    const extra = await verifyCreatedFiles(report, rootPath);
                    if (extra.length > 0) report.push(...extra);
                }
            } catch (e: any) {
                logError('selfReflector.C (chat): hook failed; continuing.', { error: e?.message ?? String(e) });
            }
            // Hollow code 검사 — selfReflectorEnabled가 켜져 있으면 syntax 통과
            // 한 파일도 빈 깡통은 잡는다. 일반 채팅 경로에선 자동 retry 없이
            // 경고만 — 사용자가 직접 보고 다시 요청할 수 있으니 충분.
            try {
                const cfgH = getConfig();
                if (cfgH.selfReflectorEnabled && report.length > 0) {
                    const { verifyHollow } = await import('./features/selfReflector/selfReflectorHollow');
                    const hollowRes = verifyHollow(report, rootPath);
                    if (hollowRes.hasHollow) report.push(...hollowRes.extraLines);
                }
            } catch (e: any) {
                logError('selfReflector.hollow (chat): hook failed; continuing.', { error: e?.message ?? String(e) });
            }
            if (!assistantContent.trim() && report.length === 0) {
                const promptCharCount = messagesForRequest.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
                logError('Model returned an empty response without actions.', {
                    model: actualModel, engine, apiUrl, loopDepth,
                    promptCharCount, inputTokens, maxOutputTokens, contextLength: ctxLimits.contextLength,
                    estimatedOverflow: outputBudget.tight, stopReason: finishStopReason,
                    messageCount: messagesForRequest.length,
                    fallbackTried: loopDepth === 0 ? 'yes' : 'no',
                });
                // 모델 식별자에서 "활성(active) 파라미터" 규모를 추정한다. MoE 모델은
                // 총 파라미터(예: 26b)가 커도 활성 파라미터(예: a4b=4)가 작아 긴 프롬프트에서
                // 첫 토큰부터 EOS 를 뱉는다(빈 응답). 총 파라미터만 보면 "26b → 큰 모델"로
                // 오판하므로 활성 파라미터로 판정한다.
                const activeB = estimateActiveParamsB(actualModel);
                const totalB = estimateModelParamsB(actualModel);
                const isMoE = activeB !== null && totalB !== null && activeB < totalB;
                const capacityHint = isMoE
                    ? `이 모델은 MoE 로 추정됩니다 (총 ~${totalB}B, **활성 ~${activeB}B**). 활성 파라미터가 작아 긴 입력(현재 ~${inputTokens.toLocaleString()} tokens)에서 첫 토큰부터 EOS 를 뱉어 빈 응답이 되기 쉽습니다. 코드 리뷰처럼 입력이 큰 작업은 **활성 7B+ 또는 한국어 특화 모델(EXAONE/Qwen 등)** 을 권장합니다.`
                    : '입력이 큰 작업에서 모델이 첫 토큰부터 EOS 를 뱉으면 보통 모델 용량 부족 또는 컨텍스트 초과입니다. 더 큰 모델(7B+)로 교체하거나 입력을 줄여 보세요.';
                const ctxMismatchHint =
                    '**LM Studio 에 로드된 실제 context length 가 Astra 설정(`g1nation.contextLength`)보다 작은지** 확인하세요. 예: 설정은 32768 인데 모델은 8192/16384 로 로드돼 있으면, Astra 가 그 한도를 넘겨 보내 서버가 잘라내거나 EOS 를 뱉습니다. (LM Studio 모델 로드 옵션의 Context Length 와 설정값을 일치)';

                const looksOverflow = outputBudget.tight || inputTokens > ctxLimits.contextLength - ctxLimits.safetyMargin;
                this.webview.postMessage({
                    type: 'error',
                    value: [
                        'AI 엔진이 빈 응답을 반환했습니다 (스트리밍 + non-streaming 폴백 모두 실패).',
                        `Engine: ${engine}`,
                        `Model: ${actualModel}${isMoE ? ` (MoE: 총 ~${totalB}B / 활성 ~${activeB}B)` : ''}`,
                        `Prompt: ~${inputTokens.toLocaleString()} tokens (${promptCharCount.toLocaleString()} chars, ${messagesForRequest.length} messages) / context window ${ctxLimits.contextLength.toLocaleString()} tokens`,
                        `Output budget: ${maxOutputTokens.toLocaleString()} tokens`,
                        ...(finishStopReason ? [`Stop reason: ${finishStopReason}`] : []),
                        '',
                        '⚠️ 빈 응답은 *답변이 길어서*가 아니라 *입력이 모델 용량에 비해 커서* 발생하는 경우가 대부분입니다 (출력은 어차피 위 budget 으로 제한됨).',
                        '',
                        '다음을 시도해보세요:',
                        '  • ' + ctxMismatchHint,
                        '  • ' + capacityHint,
                        '  • `/newChat` 으로 대화를 새로 시작하거나, Settings 에서 memoryLongTermFiles / Brain·Skill 컨텍스트를 줄여 입력을 축소',
                        '  • LM Studio 에서 모델이 실제로 로드돼 있는지 / 서버 재시작',
                        ...(looksOverflow ? ['  • 입력이 context window 에 매우 가깝습니다 — 위 컨텍스트 일치 확인이 특히 중요합니다.'] : []),
                    ].join('\n')
                });
                return;
            }

            if (report.length > 0) {
                logInfo('Agent actions executed.', { loopDepth: loopDepth + 1, report });

                // A follow-up LLM call ("continuation") is only worth making when an action injected
                // content the model must interpret (read_file / list_files / read_brain / read_sheet).
                // Output-less actions — run_command (no stdout captured), file create/edit/delete —
                // give the continuation nothing to do, yet it would re-send the whole, often near-full,
                // context; on a weak/long-context local model that second call collapses to an empty
                // response. For those, confirm deterministically and stop. No second LLM call.
                if (actionsInjectedContext && loopDepth < config.maxAutoSteps) {
                    const currentActionStr = report.join('|');
                    const lastActionStr = this.context.workspaceState.get<string>('lastActionStr');

                    if (currentActionStr === lastActionStr) {
                        this.webview.postMessage({ type: 'streamChunk', value: "\n⚠️ *Stopping to prevent infinite loop.*" });
                        return;
                    }

                    await this.context.workspaceState.update('lastActionStr', currentActionStr);
                    logInfo('Autonomous loop continuing after actions.', { loopDepth: loopDepth + 1, actions: report });

                    // Explicitly tell the AI to look at the results and continue
                    const continuationPrompt = `The requested local action has been executed.\nAction report:\n${report.join('\n')}\nUse the action result messages already in the conversation to answer the user's original request directly, in the user's language. Do not say you are waiting for the next instruction.`;

                    this.webview.postMessage({ type: 'autoContinue', value: `자료를 확인하고 답변을 정리하는 중입니다... (${loopDepth + 1}/${config.maxAutoSteps})` });
                    await new Promise(r => setTimeout(r, 800));
                    if (this.isStaleRun(runId)) return;
                    await this.handlePrompt(continuationPrompt, modelName, { ...options, loopDepth: loopDepth + 1, runId });
                } else if (!actionsInjectedContext) {
                    // Output-less actions — confirm what actually ran (deterministic), no follow-up LLM call.
                    logInfo('Actions produced no interpretable output — skipping continuation call.', { loopDepth, report });
                    this.webview.postMessage({
                        type: 'streamChunk',
                        value: '\n\n---\n실행한 작업:\n' + report.map(r => `- ${r}`).join('\n'),
                    });
                }
                return;
            }

            this.statusBarManager.updateStatus(AgentStatus.Success);
            if (this._turnCtx.retrieval) {
                // Non-blocking flag: lesson Prevention-Checklist items the answer doesn't visibly touch on.
                const unaddressedChecklist = findUnaddressedChecklistItems(finalAssistantContent, this._turnCtx.lessons);
                this.webview.postMessage({
                    type: 'usedScope',
                    value: {
                        ...this._turnCtx.retrieval,
                        hasAgentSelected: !!options.agentSkillFile,
                        unaddressedChecklist,
                        // Knowledge Mix surfaced under the answer so the user can see what policy ran.
                        knowledgeMix: this._turnCtx.knowledgeMix
                            ? {
                                weight: this._turnCtx.knowledgeMix.weight,
                                source: this._turnCtx.knowledgeMix.source,
                                agent: this._turnCtx.knowledgeMix.agent,
                            }
                            : null,
                    },
                });
            }
            // Progressive answering: the bubble was filled live with raw tokens
            // during streaming (and during any auto-continuation rounds). Now
            // that we have the cleaned + merged + policy-enforced text, swap the
            // bubble's content for the final version so the user sees the
            // correct answer regardless of what slipped through live —
            // hidden reasoning, mid-stream artifacts, continuation-overlap re-
            // emits, truncation notice. Action-loop turns (loopDepth > 0) still
            // append via streamChunk because the bubble has multiple action
            // segments and we don't have a single "final" to replace with.
            if (loopDepth === 0) {
                this.webview.postMessage({ type: 'streamReplace', value: finalAssistantContent });
                recordTelemetry({
                    kind: 'turn',
                    durationMs: Date.now() - turnStartMs,
                    model: actualModel, engine,
                    inputTokens,
                    outputTokens,
                    contextLength: ctxLimits.contextLength,
                    stopReason: finishStopReason,
                    brainFiles: this._turnCtx.retrieval?.usedBrainFiles.length ?? 0,
                    memoryLayers: this._turnCtx.retrieval?.usedMemoryLayers ?? [],
                    note: `continuations=${continuationCount} historyDropped=${reqMessages.length - budgetedHistory.length}`,
                });
                // ── Post-answer hooks (v2.2.197) — Devil + SelfCheck + TermValidator 통합 레지스트리. ──
                // 새 hook 추가 = `src/agent/postAnswerHooks/index.ts` 에 한 객체 push.
                // 안전 fallback 내장 — 한 hook 실패가 다른 hook / main turn 영향 없음.
                runPostAnswerHooks({
                    userPrompt: prompt || '',
                    assistantAnswer: finalAssistantContent,
                    baseUrl: ollamaUrl,
                    modelName: actualModel,
                    contextLength: ctxLimits.contextLength,
                    engine,
                    selfCheckSources: this._turnCtx.selfCheckSources,
                    confidenceSignals: this._turnCtx.confidenceSignals,
                    callNonStreaming: (p) => this.callNonStreaming(p),
                    getAbortSignal: () => this.abortController?.signal,
                    getWebview: () => this.webview,
                    getBrainPath: () => {
                        try { return getActiveBrainProfile()?.localBrainPath; } catch { return undefined; }
                    },
                });
            } else {
                this.webview.postMessage({ type: 'streamChunk', value: finalAssistantContent });
            }

        } catch (error: any) {
            this.statusBarManager.updateStatus(AgentStatus.Error, error.message);
            logError('Agent prompt failed.', { error: error?.message || String(error), promptPreview: summarizeText(prompt || '', 200) });
            if (!this.isStaleRun(runId)) {
                this.webview.postMessage({ type: "error", value: `[Agent Error]: ${error.message}` });
            }
        } finally {
            if (requestTimeoutHandle) {
                clearTimeout(requestTimeoutHandle);
            }
            if (loopDepth === 0 && !this.isStaleRun(runId)) {
                this.webview.postMessage({ type: 'streamEnd' });
                this.options.onStreamLifecycle?.end();
            }
        }
    }

    public async executeMultiAgentWorkflow(
        prompt: string,
        modelName: string,
        options: any
    ) {
        this.stop();
        this.abortController = new AbortController();
        return executeMultiAgentWorkflowFn({
            emitHistoryChanged: () => this.emitHistoryChanged(),
            chatHistory: this.chatHistory,
            options: this.options,
            statusBarManager: this.statusBarManager,
            getWebview: () => this.webview,
            getAbortSignal: () => this.abortController?.signal,
        }, prompt, modelName, options);
    }

    private async callAgent(role: AgentRole, prompt: string, modelName: string, options: any): Promise<string> {
        return callRoleAgentFn({
            getAbortSignal: () => this.abortController?.signal,
            createStreamingRequest: (p) => this.createStreamingRequest(p),
            options: this.options,
        }, role, prompt, modelName, options);
    }

    private isStaleRun(runId: number): boolean {
        return runId !== this.activeRunId;
    }

    // ─────────────────────────────────────────────────────────────────────────
    // Context builders / prompt detectors / history transforms 등 stateless
    // 헬퍼는 `src/lib/contextBuilders/*` 로 모두 이관. 각 모듈은 자기 책임을
    // 도큐먼트화한 한 파일이며, agent.ts 는 호출자 역할만 유지.
    // ─────────────────────────────────────────────────────────────────────────

    // buildMemoryContext → `src/lib/contextBuilders/memoryContext.ts` (130줄, RAG orchestration deps struct 패턴)

    private emitHistoryChanged() {
        if (!this.historyChangeListener) return;

        // Save session whenever history changes
        this.sessionManager.saveSession(
            this.currentTaskId,
            this.chatHistory,
            this.context.workspaceState.get<string>('lastActionStr')
        );

        Promise.resolve(this.historyChangeListener(this.getHistory())).catch((error: any) => {
            logError('History change listener failed.', { error: error?.message || String(error) });
        });
    }

    /**
     * 세션 종료 시 5-Layer Memory에 자동 추출을 수행합니다.
     * 새 채팅 시작 또는 Extension 비활성화 시 호출됩니다.
     */
    public onSessionEnd(): void {
        try {
            const workspaceFolders = vscode.workspace.workspaceFolders;
            const workspacePath = workspaceFolders ? workspaceFolders[0].uri.fsPath : undefined;

            const cfgNow = getConfig();
            this.memoryManager.onSessionEnd(
                this.currentTaskId,
                this.chatHistory.filter((m) => !m.internal),
                workspacePath,
                cfgNow.localBrainPath ? {
                    enabled: cfgNow.distillationEnabled !== false,
                    ageThresholdDays: cfgNow.distillationAgeThresholdDays ?? 30,
                    intervalDays: cfgNow.distillationIntervalDays ?? 7,
                    archiveMode: (cfgNow.distillationArchiveMode || 'mark-promoted') as any,
                    brainPath: cfgNow.localBrainPath,
                } : undefined,
            );
            logInfo('Memory extraction completed for session end.', { taskId: this.currentTaskId });
            recordTelemetry({
                kind: 'session-end',
                note: `taskId=${this.currentTaskId} messages=${this.chatHistory.filter((m) => !m.internal).length}`,
            });
            // Fire-and-forget LLM compression: turns the raw transcript into a
            // 2–3 sentence summary that medium-term retrieval can use instead
            // of just "first user msg + last assistant 200 chars". Cheap call
            // (~256 output tokens), runs in the background so it never blocks
            // the next chat turn.
            void this.compressSessionSummary(this.currentTaskId, this.chatHistory.slice());
        } catch (error: any) {
            logError('Memory extraction failed on session end.', { error: error?.message || String(error) });
        }
    }

    /**
     * Compress a finished session into a short summary and persist it to the
     * session record. The summary is later read by `compactRecentSessions` so
     * the medium-term memory layer carries a real recap instead of a fragment.
     *
     * Skips sessions with fewer than 3 visible messages — they're typically
     * single-question pings where the raw first message is already a good
     * summary. Failures are logged and swallowed: a missing summary just
     * falls back to the legacy "first user msg" representation.
     */
    private async compressSessionSummary(taskId: string, history: ChatMessage[]): Promise<void> {
        return compressSessionSummaryFn({
            context: this.context,
            callNonStreaming: (p) => this.callNonStreaming(p),
        }, taskId, history);
    }

    private async createStreamingRequest(params: {
        baseUrl: string;
        modelName: string;
        reqMessages: ChatMessage[];
        temperature: number;
        /** Dynamic output-token cap computed from the remaining context budget. */
        maxTokens?: number;
        /** Model context window in tokens (used for Ollama's num_ctx). */
        contextLength?: number;
    }): Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string }> {
        return createStreamingRequestFn({
            context: this.context,
            getAbortSignal: () => this.abortController?.signal,
        }, params);
    }

    /**
     * Non-streaming chat completion. Used as a recovery path when the
     * streaming endpoint returns an empty response — common with LM Studio
     * when a model is mid-load or the SSE channel drops.
     *
     * The body is consumed via `await response.text()` (single read), so
     * there's no ReadableStream lock to release and no chance of the
     * "lock() request could not be registered" error this method is helping
     * to avoid.
     */

    private async callNonStreaming(params: {
        baseUrl: string;
        modelName: string;
        engine: 'lmstudio' | 'ollama';
        messages: ChatMessage[];
        temperature: number;
        maxTokens?: number;
        contextLength?: number;
        signal?: AbortSignal;
    }): Promise<{ text: string; stopReason?: string }> {
        return callNonStreamingFn({ context: this.context }, params);
    }

    /**
     * Single streaming call used by progressive answering (live-delta main
     * stream + auto-continuation rounds). Mirrors the main streaming block in
     * handlePrompt but without the empty-stream recovery / non-streaming
     * fallback machinery — those only matter for the very first generation.
     *
     * When `postLiveDeltas` is true, every token is also forwarded to the
     * webview as a `streamChunk`, giving the user a real-time view of the
     * answer (and of continuation rounds) instead of one big drop at the end.
     *
     * Returns the accumulated text and the final stop reason. Aborts and
     * stale runs surface as `aborted: true` and an empty/partial text — the
     * caller decides what to do with that.
     */
    private async streamChatOnce(params: {
        runId: number;
        useLmStudioSdk: boolean;
        engine: 'lmstudio' | 'ollama';
        ollamaUrl: string;
        modelName: string;
        messages: ChatMessage[];
        temperature: number;
        maxTokens: number;
        contextLength: number;
        contextOverflowPolicy: 'stopAtLimit' | 'truncateMiddle' | 'rollingWindow';
        signal: AbortSignal;
        postLiveDeltas: boolean;
    }): Promise<{ text: string; stopReason?: string; aborted: boolean }> {
        return streamChatOnceFn({
            options: this.options,
            getWebview: () => this.webview,
            isStaleRun: (runId) => this.isStaleRun(runId),
            createStreamingRequest: (p) => this.createStreamingRequest(p),
        }, params);
    }

    // lmStudioSamplingFromConfig / lmStudioRespondExtrasFromConfig
    //   → `src/lib/contextBuilders/lmStudioSampling.ts`

    /**
     * Public entry point for callers that need to apply ConnectAI's action
     * tags (`<create_file>`, `<run_command>`, `<edit_file>`, …) to arbitrary
     * text without going through the full `handlePrompt` pipeline.
     *
     * The 1인 기업 dispatcher uses this so specialist outputs that contain
     * action tags actually take effect on disk — without it, agents would
     * "claim" to create files but nothing would be written, which is the
     * exact symptom the user reported.
     *
     * Returns the action report (`["✅ Created: …", "📂 Listed: …", …]`) so
     * the caller can surface it back to the user. Errors inside individual
     * actions are converted into report entries rather than thrown, matching
     * the behaviour of the internal call site.
     */
    public async executeActionTagsOnText(aiMessage: string): Promise<string[]> {
        return executeActionTagsOnTextFn(
            { executeActions: (msg, root, brain) => this.executeActions(msg, root, brain) },
            aiMessage,
        );
    }

    private async executeActions(aiMessage: string, rootPath: string, activeBrain: BrainProfile): Promise<string[]> {
        const report: string[] = [];
        let brainModified = false;
        const activeBrainDir = activeBrain.localBrainPath;
        let firstCreatedFile: string | undefined;

        try {
            this.transactionManager.begin();

            // 모든 handler 가 같은 ctx 객체를 공유 — report.push / chatHistory.push /
            // brainModified / firstCreatedFile 가 콜백·배열-share 로 누적된다.
            const ctx: HandlerContext = {
                aiMessage,
                rootPath,
                activeBrainDir,
                report,
                chatHistory: this.chatHistory,
                markBrainModified: () => { brainModified = true; },
                setFirstCreated: (absPath) => { if (!firstCreatedFile) firstCreatedFile = absPath; },
                transactionManager: this.transactionManager,
                context: this.context,
            };

            // 15+ action tags 를 8 그룹으로 분리. 순서는 원본과 동일 — file 작업이
            // 먼저 (transaction record 가 의미 있는 경우), 그 다음 read-only / 외부 API.
            await applyFileCreateEditActions(ctx);
            await applyFileDeleteReadActions(ctx);
            await applyRunCommandActions(ctx);
            await applyListFilesActions(ctx);
            await applyWebFetchActions(ctx);
            await applyBrainOpsActions(ctx);
            await applyCalendarActions(ctx);
            await applySheetsActions(ctx);
            await applyTasksActions(ctx);

            if (firstCreatedFile) {
                // Always open file results in the editor group (column 2) — the ConnectAI
                // sidebar lives in column 3 and we don't want freshly-written files to
                // hijack the chat panel.
                vscode.window.showTextDocument(vscode.Uri.file(firstCreatedFile), {
                    preview: false,
                    viewColumn: vscode.ViewColumn.Two,
                });
            }

            // Brain Sync Logic
            if (brainModified && shouldAutoPushBrain() && activeBrain.secondBrainRepo) {
                this.syncBrain(activeBrainDir);
            }

            const config = getConfig();
            if (config.dryRun) {
                report.push(`\n⚠️ **Dry Run Mode Active**: 위 변경 사항을 확인하고 [승인] 또는 [롤백]을 선택해주세요.`);
                this.webview?.postMessage({ type: 'requiresApproval' });
                // Mirror the inline-chat approval into the queue feeding the dedicated panel + status bar.
                const queue = this.options.approvalQueue;
                if (queue) {
                    const recorded = this.transactionManager.getRecordedFiles();
                    queue.enqueue(
                        {
                            id: `txn-${Date.now()}`,
                            kind: 'transaction',
                            title: 'Pending file changes',
                            summary: `${recorded.length}개 파일 변경 대기 중`,
                            files: recorded.map(r => r.path),
                            createdAt: Date.now(),
                        },
                        {
                            approve: () => this.approveTransaction(),
                            reject: () => this.rejectTransaction(),
                        }
                    );
                }
                // Do NOT commit yet
            } else {
                this.transactionManager.commit();
            }
        } catch (error: any) {
            this.transactionManager.rollback();
            const g1Error = error instanceof AgentExecutionError ? error : new AgentExecutionError(error.message, error);
            report.push(`🛑 Transaction Failed: ${g1Error.message}. All file changes rolled back.`);
            logError('Action execution failed, rolled back.', g1Error);
            // A failed-and-rolled-back action is a strong "something went wrong" signal — offer to record a lesson.
            this.webview?.postMessage({ type: 'lessonCandidate', value: { trigger: 'rollback', reason: g1Error.message } });
            // We return the report with the failure message instead of throwing
            // so the agent can see the failure and decide what to do next
        }
        return report;
    }

    private syncBrain(brainDir: string) {
        return syncBrainFn(brainDir);
    }
}