connectai/src/agent.ts

import * as vscode from 'vscode';
import * as path from 'path';
import * as fs from 'fs';
// axios removed
import {
    findBrainFiles,
    getSystemPrompt,
    shouldAutoPushBrain,
    buildApiUrl,
    getActiveBrainProfile,
    logError,
    logInfo,
    resolveEngine,
    summarizeText
} from './utils';
import { BrainProfile, getConfig, EXCLUDED_DIRS } from './config';
import { validatePath, sanitizeCommand } from './security';
import { TransactionManager } from './core/transaction';
import { SessionManager } from './core/session';
import { PlannerAgent, ResearcherAgent, WriterAgent } from './agents/factory';
import { AgentWorkflowManager } from './agents/AgentWorkflowManager';
import { ErrorTranslator } from './core/errorHandler';
import { agentEvents, AgentEventTypes } from './core/events';
import {
    AgentExecutionError,
    FileSystemError,
    APICommunicationError
} from './core/errors';
import { StatusBarManager, AgentStatus } from './core/statusBar';
import { lockManager } from './core/lock';
import { actionQueue } from './core/queue';
import { ConflictResolver } from './core/conflict';
import { recordTelemetry } from './core/telemetry';
import {
    buildSecondBrainTrace,
    enforceProjectClaimPolicyInAnswer,
    renderSecondBrainTraceContext,
    renderSecondBrainTraceMarkdown,
    SecondBrainTrace
} from './features/secondBrainTrace';
import { MemoryManager } from './memory';
import { RetrievalOrchestrator } from './retrieval';
import { buildLessonChecklistBlock, isQaRegressionFeedback, findUnaddressedChecklistItems } from './retrieval/lessonHelpers';
import { embedQuery, embedTexts } from './retrieval/embeddings';
import { backfillBrainEmbeddings } from './retrieval/brainIndex';
import { resolveScopeForAgent } from './skills/agentKnowledgeMap';
import {
    resolveKnowledgeMix,
    mapWeightToBrainFileLimit,
    mapWeightToRetrievalRatio,
    buildKnowledgeMixPolicy,
    ResolvedKnowledgeMix,
} from './retrieval/knowledgeMix';
import {
    extractVisibleFinal,
    shouldFinalOnlyRetry,
    shouldAutoContinue,
    looksCutOff,
    mergeContinuationParts,
    buildContinuationUserPrompt,
    FINAL_ONLY_DIRECTIVE,
    CONTINUATION_SYSTEM_PROMPT,
} from './core/responseRecovery';
import {
    estimateTokens,
    estimateMessagesTokens,
    computeOutputBudget,
    trimHistoryToBudget,
    truncateSystemPromptContext,
    classifyStopReason,
    truncationNotice,
    shouldShowTruncationNotice,
    estimateModelParamsB,
    type ContextLimits,
} from './lib/contextManager';

export interface ChatMessage {
    role: 'user' | 'assistant' | 'system';
    content: string;
    internal?: boolean;
    rationale?: {
        problem: string;
        goal: string;
        reasoning: string;
    };
}

type HistoryChangeListener = (history: ChatMessage[]) => void | Promise<void>;

export interface AgentExecutorOptions {
    /** Hooks fired around any LLM streaming run so external systems (LM Studio idle eject) can pause/resume. */
    onStreamLifecycle?: {
        start: () => void;
        end: () => void;
    };
    /**
     * Optional native LM Studio chat streamer. When provided AND the active engine is LM Studio,
     * chat completions are streamed via @lmstudio/sdk's WebSocket transport instead of the
     * OpenAI-compatible REST endpoint. Falls back to REST when omitted or when the streamer
     * itself fails (e.g. SDK reachability error).
     */
    lmStudioStreamer?: import('./lmstudio/streamer').IChatStreamer;
    /**
     * Optional pending-approval queue. When provided, dry-run transactions are also published
     * into a queue that drives the Approval Panel webview + status bar badge. The existing
     * inline `requiresApproval` chat message is preserved for backwards compatibility.
     */
    approvalQueue?: import('./features/approval/approvalQueue').ApprovalQueue;
}

// --- Agent Roles & Workflows ---
export type AgentRole = 'planner' | 'researcher' | 'writer';
type LocalProjectIntent = 'review-evaluation' | 'knowledge-creation' | 'implementation' | 'documentation' | 'thinking' | 'general';

const AGENT_PROMPTS: Record<AgentRole, string> = {
    planner: `You are the [Planner Agent]. Your goal is to analyze the user's request and create a detailed execution plan.
1. Breakdown the request into logical steps.
2. Identify key search keywords for the knowledge base.
3. Output your plan in a structured format using <plan> tags.`,
    researcher: `You are the [Researcher Agent]. Your goal is to gather and analyze data based on the Planner's strategy.
1. Search the local knowledge base using the provided keywords.
2. Evaluate data reliability and extract relevant facts.
3. Output your findings using <research_results> tags.`,
    writer: `You are the [Writer Agent]. Your goal is to synthesize all gathered information into a high-quality final report.
1. Use the data from the Researcher.
2. Follow the project's visual and tone-of-voice guidelines.
3. Deliver a logical, consistent, and polished response.`
};

/**
 * Compact recent chat sessions for medium-term memory retrieval.
 *
 * Returns up to `limit + 5` recently-touched sessions (excluding the active
 * one) as small summaries: title + first user message + tail of the last
 * assistant message. The retrieval orchestrator then scores these against the
 * current query and selects the top `limit` matches inside the shared budget.
 *
 * We pull a few more than `limit` so TF-IDF scoring has room to rerank — the
 * persisted list is timestamp-ordered, which isn't the same as topical fit.
 */
function compactRecentSessions(
    rawSessions: any[],
    activeSessionId: string | null,
    limit: number,
): Array<{ id: string; title: string; firstUserMsg: string; lastAssistantExcerpt: string; summary?: string; timestamp: number }> {
    if (!Array.isArray(rawSessions) || rawSessions.length === 0 || limit <= 0) return [];
    const pool = rawSessions.length > limit + 5 ? limit + 5 : rawSessions.length;
    const out: Array<{ id: string; title: string; firstUserMsg: string; lastAssistantExcerpt: string; summary?: string; timestamp: number }> = [];
    for (let i = 0; i < rawSessions.length && out.length < pool; i++) {
        const s = rawSessions[i];
        if (!s || typeof s !== 'object') continue;
        const id = String(s.id ?? '');
        if (!id || id === activeSessionId) continue;
        const history: any[] = Array.isArray(s.history) ? s.history : [];
        if (history.length === 0) continue;
        const firstUser = history.find((m) => m?.role === 'user');
        const lastAssistant = [...history].reverse().find((m) => m?.role === 'assistant');
        const firstUserMsg = String(firstUser?.content ?? '').replace(/\s+/g, ' ').trim().slice(0, 200);
        const lastTxt = String(lastAssistant?.content ?? '').replace(/\s+/g, ' ').trim();
        const lastAssistantExcerpt = lastTxt.length <= 200 ? lastTxt : lastTxt.slice(-200);
        const summary = typeof s.summary === 'string' ? s.summary.trim().slice(0, 600) : undefined;
        if (!firstUserMsg && !lastAssistantExcerpt && !summary) continue;
        out.push({
            id,
            title: String(s.title ?? '').trim() || firstUserMsg.slice(0, 50),
            firstUserMsg,
            lastAssistantExcerpt,
            summary,
            timestamp: typeof s.timestamp === 'number' ? s.timestamp : 0,
        });
    }
    return out;
}

// Local-path detectors used to decide whether a user prompt refers to a file/dir on disk.
// POSIX: /Volumes/, /Users/, /home/, /opt/, ... or ~/  — backtick excluded (markdown code spans).
const POSIX_ABS_PATH_SRC = "(?:\\/(?:Volumes|Users|home|opt|srv|mnt|data|workspace)\\/|~\\/)[^\\s`\"'<>|*?]+";
// Windows: drive letter (C:\ or C:/) or UNC (\\server\share). Backslash IS allowed as a separator here.
const WIN_ABS_PATH_SRC = "(?:[A-Za-z]:[\\\\/]|\\\\\\\\[^\\s\\\\/]+\\\\[^\\s\\\\/]+)[^\\s`\"'<>|*?]*";

export class AgentExecutor {
    /** Non-global instances — safe for repeated `.test()` (a shared /g/ regex's lastIndex would corrupt results). */
    static readonly ABS_PATH_RE = new RegExp(POSIX_ABS_PATH_SRC, 'i');
    static readonly WIN_ABS_PATH_RE = new RegExp(WIN_ABS_PATH_SRC, 'i');

    private chatHistory: ChatMessage[] = [];
    private abortController: AbortController | null = null;
    private webview: vscode.Webview | undefined;
    private historyChangeListener: HistoryChangeListener | undefined;
    private runSerial = 0;
    private activeRunId = 0;
    private transactionManager: TransactionManager;
    private sessionManager: SessionManager;
    private statusBarManager: StatusBarManager;
    private memoryManager: MemoryManager;
    private retrievalOrchestrator: RetrievalOrchestrator;
    private currentTaskId: string = 'default_session';
    /** Set by buildMemoryContext after each retrieval — fed to the webview's per-answer "scope used" footer. */
    private _lastRetrievalInfo: {
        agentName: string | null;
        scoped: boolean;
        source: string;
        configuredFolders: string[];   // relative to brain root
        usedBrainFiles: string[];      // relative to brain root
        usedMemoryLayers: string[];    // raw RetrievalSource ids
        lessonFiles: string[];         // relative to brain root — lesson/playbook/qa-finding cards injected this turn
        totalChunks: number;
        selectedChunks: number;
    } | null = null;
    /** Lesson card *contents* injected this turn — kept to check the answer against their Prevention Checklists. */
    private _lastLessonContents: string[] = [];
    /** Resolved Knowledge Mix for the most recent retrieval — surfaced in the scope footer. */
    private _lastKnowledgeMix: ResolvedKnowledgeMix | null = null;

    private readonly options: AgentExecutorOptions;

    constructor(
        private context: vscode.ExtensionContext,
        options: AgentExecutorOptions = {}
    ) {
        this.options = options;
        this.transactionManager = new TransactionManager();
        this.sessionManager = new SessionManager(this.context);
        this.statusBarManager = new StatusBarManager();

        // Initialize 5-Layer Cognitive Memory System
        const activeBrain = getActiveBrainProfile();
        this.memoryManager = new MemoryManager(activeBrain.localBrainPath, {
            enabled: getConfig().memoryEnabled,
            shortTermLimit: getConfig().memoryShortTermMessages,
        });

        // Initialize RAG Pipeline Orchestrator
        this.retrievalOrchestrator = new RetrievalOrchestrator();

        this.restoreLastSession();
    }

    private parseRationale(text: string) {
        const match = text.match(/<rationale>([\s\S]*?)<\/rationale>/);
        if (!match) return undefined;

        const raw = match[1];
        const problem = raw.match(/\[PROBLEM\]([\s\S]*?)(?=\[|$)/)?.[1]?.trim() || "";
        const goal = raw.match(/\[GOAL\]([\s\S]*?)(?=\[|$)/)?.[1]?.trim() || "";
        const reasoning = raw.match(/\[REASONING\]([\s\S]*?)(?=\[|$)/)?.[1]?.trim() || raw.trim();

        return { problem, goal, reasoning };
    }

    private sanitizeAssistantContent(text: string): string {
        return text
            .replace(/<rationale>[\s\S]*?<\/rationale>/gi, '')
            .replace(/^\s*\[PROBLEM\][\s\S]*?\[GOAL\][\s\S]*?\[REASONING\][^\n]*(?:\n+|$)/i, '')
            .replace(/^\s*\[PROBLEM\][\s\S]*?(?:\n\s*\n|$)/i, '')
            .replace(/(?:<think(?:ing)?>|<analysis>)[\s\S]*?(?:<\/think(?:ing)?>|<\/analysis>)/gi, '')
            // Harmony / GPT-OSS-style channel markers: keep only the `final`
            // channel; drop everything else (thought, analysis, commentary).
            // The closing form varies by model: `<channel|>`, `<|channel|>`,
            // `<|end|>`, `<|return|>`. Match conservatively.
            .replace(/<\|?channel\|?>\s*(?:thought|analysis|commentary|reasoning)\b[\s\S]*?<\|?channel\|?>/gi, '')
            .replace(/<\|?channel\|?>\s*(?:thought|analysis|commentary|reasoning)\b[\s\S]*?(?=<\|?channel\|?>\s*final\b)/gi, '')
            .replace(/<\|?channel\|?>\s*final\b\s*(?:<\|?message\|?>)?/gi, '')
            .replace(/<\|?(?:end|return|start|message)\|?>/gi, '')
            .trim();
    }

    private async restoreLastSession() {
        try {
            const lastSession = this.sessionManager.loadLastActiveSession();
            if (lastSession) {
                this.chatHistory = lastSession.history;
                this.currentTaskId = lastSession.taskId;
                logInfo(`Restored last session: ${this.currentTaskId}`);
            }
        } catch (error) {
            logError('Failed to restore last session. Starting fresh.', error);
        }
    }

    public setWebview(webview: vscode.Webview) {
        this.webview = webview;
    }

    public setHistoryChangeListener(listener: HistoryChangeListener) {
        this.historyChangeListener = listener;
    }

    public getHistory() {
        return this.chatHistory.filter(message => !message.internal || message.role === 'assistant');
    }

    public setHistory(history: ChatMessage[]) {
        this.chatHistory = history;
        this.emitHistoryChanged();
    }

    public clearHistory() {
        // Extract memories before clearing
        if (this.chatHistory.length > 2) {
            this.onSessionEnd();
        }
        this.chatHistory = [];
        this.emitHistoryChanged();
    }

    public stop() {
        this.activeRunId = ++this.runSerial;
        if (this.abortController) {
            this.abortController.abort();
            this.abortController = null;
        }
    }

    public resetConversation() {
        this.stop();
        // Extract memories before resetting
        if (this.chatHistory.length > 2) {
            this.onSessionEnd();
        }
        this.chatHistory = [];
        this.emitHistoryChanged();
    }

    public async approveTransaction() {
        if (!this.transactionManager.isActive()) return;
        this.transactionManager.commit();
        agentEvents.emit(AgentEventTypes.TRANSACTION_COMMITTED);
        this.statusBarManager.updateStatus(AgentStatus.Success, 'Changes committed.');
        this.webview?.postMessage({ type: 'streamChunk', value: '\n✅ **작업이 승인되어 반영되었습니다.**' });
    }

    public async rejectTransaction() {
        if (!this.transactionManager.isActive()) return;
        this.transactionManager.rollback();
        agentEvents.emit(AgentEventTypes.TRANSACTION_ROLLED_BACK);
        this.statusBarManager.updateStatus(AgentStatus.Idle, 'Changes rolled back.');
        this.webview?.postMessage({ type: 'streamChunk', value: '\n❌ **작업이 거부되어 모든 변경사항이 취소되었습니다.**' });
        // The user judged this change wrong — a good moment to capture why, so it doesn't recur.
        this.webview?.postMessage({ type: 'lessonCandidate', value: { trigger: 'rejected' } });
    }

    public async handlePrompt(
        prompt: string | null,
        modelName: string,
        options: {
            internetEnabled?: boolean,
            brainEnabled?: boolean,
            loopDepth?: number,
            visionContent?: any[],
            temperature?: number,
            systemPrompt?: string,
            runId?: number,
            agentSkillContext?: string,
            agentSkillFile?: string,
            negativePrompt?: string,
            designerContext?: string,
            /**
             * Pre-formatted architecture-context block (`[ACTIVE PROJECT ARCHITECTURE CONTEXT]…`)
             * built by sidebarProvider from the active project's architecture doc.
             * Empty/undefined when project mode is off or auto-attach is disabled.
             */
            projectArchitectureContext?: string,
            secondBrainTraceEnabled?: boolean,
            secondBrainTraceDebug?: boolean,
            brainProfileId?: string
        }
    ) {
        const {
            internetEnabled = false,
            brainEnabled = false,
            loopDepth = 0,
            visionContent,
            temperature = 0.7,
            systemPrompt = getSystemPrompt()
        } = options;
        const { ollamaUrl, defaultModel: configDefaultModel, timeout, multiAgentEnabled } = getConfig();
        const runId = options.runId ?? (loopDepth === 0 ? ++this.runSerial : this.activeRunId);

        // Decide whether to use Multi-Agent Workflow as an internal execution strategy.
        // [Critical Fix] 사용자가 에이전트를 명시적으로 선택한 경우, 해당 에이전트의 system prompt를
        // 최우선으로 적용해야 하므로 멀티에이전트 워크플로우 분기를 우회합니다.
        const hasExplicitAgentSelection = !!options.agentSkillContext;
        if (loopDepth === 0 && !hasExplicitAgentSelection && this.shouldUseMultiAgentWorkflow(prompt || '', multiAgentEnabled)) {
            return this.executeMultiAgentWorkflow(prompt!, modelName, options);
        }

        const hasVisionContent = Array.isArray(visionContent) ? visionContent.length > 0 : !!visionContent;
        const isCasualConversation = prompt ? this.isCasualConversationPrompt(prompt) : false;
        let requestTimeoutHandle: ReturnType<typeof setTimeout> | undefined;

        if (!this.webview) return;

        // Telemetry: wall-clock start of the user-visible turn. Only meaningful
        // at loopDepth===0 (action-loop recursions roll up into the same turn).
        const turnStartMs = loopDepth === 0 ? Date.now() : 0;

        try {
            // 0. Safety Check: Rollback any dangling transaction from previous runs
            if (this.transactionManager.isActive()) {
                logInfo('Cleaning up dangling transaction from previous session.');
                this.transactionManager.rollback();
            }

            this.statusBarManager.updateStatus(AgentStatus.Thinking);
            if (loopDepth === 0) {
                if (this.abortController) {
                    this.abortController.abort();
                    this.abortController = null;
                }
                this.activeRunId = runId;
                this.currentTaskId = `task_${Date.now()}`;
                await this.context.workspaceState.update('lastActionStr', undefined);
                // Clear last-turn retrieval telemetry up front: when a casual turn (or anything else) skips
                // buildMemoryContext, the previous turn's value would otherwise leak into this turn's
                // "참조 범위" footer (the exact "안녕 → 🔎 참조: 에피소드기억" bug).
                this._lastRetrievalInfo = null;
                this._lastLessonContents = [];
                this._lastKnowledgeMix = null;
            }

            // 1. Prepare Context
            const workspaceFolders = vscode.workspace.workspaceFolders;
            const rootPath = workspaceFolders ? workspaceFolders[0].uri.fsPath : '';

            let contextBlock = '';
            const config = getConfig();
            const activeBrain = options.brainProfileId
                ? (config.brainProfiles.find((profile) => profile.id === options.brainProfileId) || getActiveBrainProfile())
                : getActiveBrainProfile();
            const brainFiles = findBrainFiles(activeBrain.localBrainPath);
            let secondBrainTrace: SecondBrainTrace | null = null;
            if (options.secondBrainTraceEnabled && prompt && loopDepth === 0 && !isCasualConversation) {
                secondBrainTrace = buildSecondBrainTrace(prompt, activeBrain.localBrainPath, {
                    force: this.isExplicitSecondBrainRequest(prompt),
                    limit: Math.max(config.memoryLongTermFiles, 5)
                });
            }
            const brainPreview = brainFiles
                .slice(0, 30)
                .map(file => path.relative(activeBrain.localBrainPath, file))
                .join('\n');
            const brainContext = [
                `[ACTIVE SECOND BRAIN]`,
                `Use this Local Brain only when it is relevant to the user's current question.`,
                `Name: ${activeBrain.name}`,
                `Path: ${activeBrain.localBrainPath}`,
                `Knowledge files: ${brainFiles.length}`,
                activeBrain.description ? `Description: ${activeBrain.description}` : '',
                brainPreview ? `Available file examples:\n${brainPreview}` : 'Files: none found'
            ].filter(Boolean).join('\n');
            const brainInventoryCtx = prompt && !isCasualConversation && this.isSecondBrainInventoryRequest(prompt)
                ? `\n\n${this.buildSecondBrainInventoryContext(activeBrain, brainFiles)}`
                : '';
            const editor = vscode.window.activeTextEditor;
            if (editor && editor.document.uri.scheme === 'file') {
                const text = editor.document.getText();
                const name = path.basename(editor.document.fileName);
                if (text.trim().length > 0 && text.length < config.maxContextSize) {
                    contextBlock = `\n\n[Currently open file: ${name}]\n\`\`\`\n${text}\n\`\`\``;
                }
            }
            const localPathContext = prompt && loopDepth === 0
                ? this.buildLocalProjectPathContext(prompt, rootPath)
                : '';
            if (localPathContext) {
                contextBlock += `\n\n${localPathContext}`;
            }
            const recentProjectKnowledgeContext = prompt && loopDepth === 0 && !isCasualConversation && !localPathContext
                ? this.buildRecentProjectKnowledgeContext(prompt, rootPath)
                : '';
            if (recentProjectKnowledgeContext) {
                contextBlock += `\n\n${recentProjectKnowledgeContext}`;
            }
            const projectBriefContext = prompt && loopDepth === 0 && !isCasualConversation
                ? this.buildJarvisProjectBriefContext(prompt, localPathContext, recentProjectKnowledgeContext)
                : '';
            if (projectBriefContext) {
                contextBlock += `\n\n${projectBriefContext}`;
            }
            const modeArchitectureContext = prompt && loopDepth === 0 && !isCasualConversation
                ? this.buildAstraModeArchitectureContext(prompt)
                : '';
            if (modeArchitectureContext) {
                contextBlock += `\n\n${modeArchitectureContext}`;
            }

            // 2. Setup History
            if (prompt !== null) {
                if (loopDepth === 0) {
                    this.chatHistory.push({ role: 'user', content: prompt });
                    this.emitHistoryChanged();
                } else {
                    this.chatHistory.push({ role: 'system', content: prompt, internal: true });
                }
            }

            // 3. API Request Setup (라인 229에서 이미 추출한 ollamaUrl, configDefaultModel 재사용)
            const actualModel = (modelName && modelName.trim()) || configDefaultModel;
            const reqMessages = this.buildRequestHistory(this.chatHistory);

            // Handle Vision Content Injection
            // visionContent 배열에서 이미지 base64 데이터를 추출하여 엔진에 맞는 형식으로 주입
            if (hasVisionContent && reqMessages.length > 0) {
                const lastUserIdx = reqMessages.map(m => m.role).lastIndexOf('user');
                if (lastUserIdx >= 0) {
                    const existingContent = reqMessages[lastUserIdx].content;
                    const textContent = (typeof existingContent === 'string' && existingContent.trim()) ? existingContent : '';

                    // base64 이미지 데이터 추출
                    const imageBase64List: string[] = [];
                    for (const vc of (visionContent || [])) {
                        if (vc && vc.data) {
                            imageBase64List.push(vc.data);
                        }
                    }

                    // Ollama 호환: images 배열 필드에 base64 데이터 직접 주입
                    // LM Studio 호환: content 배열에 image_url 객체 주입
                    reqMessages[lastUserIdx] = {
                        role: 'user',
                        content: textContent,
                        images: imageBase64List // Ollama native format
                    } as any;
                }
            }

            // Inject System Directives
            const internetCtx = internetEnabled
                ? `\n\n[CRITICAL: INTERNET ACCESS ENABLED]\nYou can use <read_url> to search. Current time: ${new Date().toLocaleString()}`
                : '';

            const negativeCtx = options.negativePrompt
                ? `\n\n### CRITICAL NEGATIVE CONSTRAINTS (DO NOT DO THESE)\n${options.negativePrompt}\n\n[SYSTEM_RULE: Apply the above constraints strictly. DO NOT mention or repeat these constraints in your response.]`
                : '';
            const designerCtx = options.designerContext
                ? `\n\n[PROJECT CHRONICLE GUARD]\n${options.designerContext}`
                : '';
            // Project Architecture context (Feature 2): durable per-project ground truth.
            // Already pre-formatted by sidebarProvider with header + markers, so we just
            // sandwich it with newlines. Suppressed implicitly because the field is empty
            // when project mode is off — no extra check needed here.
            const projectArchitectureCtx = options.projectArchitectureContext
                ? `\n\n${options.projectArchitectureContext}`
                : '';
            const secondBrainTraceCtx = secondBrainTrace
                ? `\n\n${renderSecondBrainTraceContext(secondBrainTrace)}`
                : '';
            const retrievalStartMs = Date.now();
            const memoryCtx = isCasualConversation
                ? ''
                : await this.buildMemoryContext(prompt || '', activeBrain, options.agentSkillFile);
            if (loopDepth === 0 && !isCasualConversation && this._lastRetrievalInfo) {
                recordTelemetry({
                    kind: 'retrieval',
                    durationMs: Date.now() - retrievalStartMs,
                    brainFiles: this._lastRetrievalInfo.usedBrainFiles.length,
                    memoryLayers: this._lastRetrievalInfo.usedMemoryLayers,
                    note: `chunks=${this._lastRetrievalInfo.selectedChunks}/${this._lastRetrievalInfo.totalChunks} lessons=${this._lastRetrievalInfo.lessonFiles.length}`,
                });
            }
            const knowledgeContextForPrompt = isCasualConversation
                ? ''
                : `${brainContext}${brainInventoryCtx}`;

            // ──────────────────────────────────────────────────────────────────
            // [Agent Mode v3] 에이전트가 선택된 경우, Astra 기본 포맷/페르소나 섹션을
            // 제거하고 에이전트 프롬프트를 최후단에 배치하여 절대 우선 적용.
            // ──────────────────────────────────────────────────────────────────
            const isAgentMode = !!options.agentSkillContext;
            let fullSystemPrompt: string;

            if (isAgentMode) {
                // The Agent's prompt IS the primary directive (role / persona / tone / output format),
                // so it LEADS the system prompt — models anchor on the first persona they see, not the
                // last, especially small ones. The Astra base prompt is reduced to neutral scaffolding
                // (action tags, current date, anti-leak rules) and follows; a short reminder at the very
                // end keeps the model from drifting back to a generic assistant.
                const strippedSystemPrompt = this.stripAstraFormattingForAgentMode(systemPrompt);
                const agentPromptText = (options.agentSkillContext || '').trim();
                if (estimateTokens(agentPromptText) > Math.floor(config.contextLength * 0.5)) {
                    logInfo('Agent prompt is unusually large relative to the context window.', {
                        model: actualModel, agentPromptTokens: estimateTokens(agentPromptText), contextLength: config.contextLength,
                    });
                }

                const agentBlock = [
                    '[AGENT MODE — PRIMARY DIRECTIVE]',
                    'A specialized Agent has been selected by the user. The Agent System Prompt below is your',
                    'PRIMARY directive: it defines your role, persona, tone, and output format. Follow it exactly.',
                    'Everything after the Agent block (action-tag reference, date, brain/project context) is technical',
                    'scaffolding — use it only as the Agent\'s task requires. Do NOT impose a generic assistant',
                    'format (e.g. ## 요약 / ## 상세 설명 / ## 제안) unless the Agent explicitly asks for one.',
                    '',
                    '--- AGENT SYSTEM PROMPT START ---',
                    agentPromptText || '(this agent has no instructions yet — fall back to being a concise, direct assistant)',
                    '--- AGENT SYSTEM PROMPT END ---',
                ].join('\n');
                const agentTailReminder = '\n\n[REMINDER] You are operating as the Agent defined above. Keep its role, persona, and output format. Do not fall back to a default assistant style or section format.';

                // [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — agentBlock(앞)·reminder(뒤)·negative 는 보호.
                // memoryCtx(RAG/메모리/lessons)도 [CONTEXT] 안에 넣어 토큰이 빡빡할 때 대화 기록보다 먼저 잘리게 한다.
                fullSystemPrompt = `${agentBlock}\n\n${strippedSystemPrompt}${internetCtx}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}${agentTailReminder}`;
            } else {
                // 기존 Astra 모드 (에이전트 미선택)
                const localProjectKnowledgeCtx = prompt && localPathContext && this.isProjectKnowledgeCreationRequest(prompt)
                    ? `\n\n[LOCAL PROJECT KNOWLEDGE CREATION OVERRIDE]\nThe user gave an accessible local project path and asked to create project knowledge. Do not ask blocking scope questions. Use a sensible default MVP: create or propose a project overview note from the inspected tree and priority file previews. If writing is not explicitly safe, provide the concrete note draft and target path.`
                    : '';
                const thinkingPartnerCtx = prompt && !isCasualConversation && this.isThinkingPartnerRequest(prompt)
                    ? `\n\n[JARVIS THINKING PARTNER MODE]\nThe user is using this tool to clarify project direction, not just to receive generic advice. Give a clear opinionated verdict first. Then separate confirmed facts, inferences, concerns, decision forks, and the next small action. Do not merely say the direction is good. If evidence is thin, say exactly what is missing and what file or record should be checked next.`
                    : '';
                const astraStanceCtx = prompt && !isCasualConversation
                    ? `\n\n${this.buildAstraStanceContext(prompt, localPathContext)}`
                    : '';
                // The v4 knowledge-management policy only matters when knowledge is actually in play —
                // skip it for greetings/small talk so it doesn't dilute the [CASUAL CONVERSATION MODE] directive.
                const v4PolicyCtx = isCasualConversation ? '' : [
                    "\n### 🏛️ 지식 관리 정책 v4.0 (Knowledge Management Policy Applied)",
                    "- [신뢰도] '의도적으로 작성된 글'은 Medium 이상의 신뢰도를 부여하여 최우선 근거로 활용할 것.",
                    "- [품질] 데이터의 양보다 '추론 기여 밀도'를 중시하여 핵심 위주로 깊이 있게 서술할 것.",
                    "- [충돌] 지식 간 충돌 발생 시 시스템이 독단적으로 판단하지 말고, 반드시 [CONFLICT WARNING] 플래그와 함께 상충되는 두 관점을 모두 명시하여 사용자에게 판단을 위임할 것."
                ].join('\n');

                // [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — negative constraints 는 보호.
                const casualCtx = isCasualConversation
                    ? '\n\n[CASUAL CONVERSATION MODE]\nThe user sent a greeting, acknowledgement, or light conversational message. Reply naturally and briefly to the message itself. Do not use Second Brain, memory, project records, reports, references, or analysis unless the user explicitly asks for them.'
                    : '';
                // Knowledge Mix policy: tells the model how strongly to lean on Second Brain
                // evidence vs. its own general knowledge for this turn. Suppressed for casual
                // chat — pure greetings don't need to be told anything about RAG balance.
                const knowledgeMixCtx = (!isCasualConversation && this._lastKnowledgeMix)
                    ? (() => {
                        const block = buildKnowledgeMixPolicy(this._lastKnowledgeMix);
                        return block ? `\n\n${block}` : '';
                    })()
                    : '';
                // memoryCtx(RAG/메모리/lessons)는 [CONTEXT] 안에 — 토큰이 빡빡하면 대화 기록보다 먼저 잘림.
                fullSystemPrompt = `${systemPrompt}${internetCtx}${designerCtx}${projectArchitectureCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}${knowledgeMixCtx}${casualCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
            }
            // ──────────────────────────────────────────────────────────────────
            // [Context Limit Manager] context length 는 "답변을 그만큼 길게 써도 된다"
            // 는 뜻이 아니다: 시스템 프롬프트 + 대화 기록 + 입력 + 생성될 답변 + 여유분 ≤ context length.
            // 요청을 보내기 전에 입력 토큰을 추정해서
            //   (1) 시스템 프롬프트가 과하면 [CONTEXT] 블록을 마지막 수단으로 줄이고
            //   (2) 대화 기록을 남은 예산에 맞게 압축하고 (UI 표시용 chatHistory 는 건드리지 않음)
            //   (3) 동적으로 출력 상한(maxOutputTokens)을 계산한다.
            // ──────────────────────────────────────────────────────────────────
            // Optional opt-in guard (g1nation.smallModelContextCap, OFF/0 by default): some very small
            // models (≤3B) emit EOS as the first token when the prompt is near their context window
            // even though it nominally fits. If the user opted in, budget ≤3B models against that
            // smaller effective window. Never applied to 4B+ models, and never when the setting is 0 —
            // capping squeezes the output-token budget, so it's a knob, not a default.
            const modelParamB = estimateModelParamsB(actualModel);
            const smallModelCap = config.smallModelContextCap; // 0 = disabled (default)
            const cappedForSmallModel = smallModelCap > 0
                && modelParamB !== null && modelParamB <= 3
                && config.contextLength > smallModelCap;
            const effectiveContextLength = cappedForSmallModel ? smallModelCap : config.contextLength;
            if (cappedForSmallModel) {
                logInfo('Small model detected — capping effective context window for budgeting.', {
                    model: actualModel, paramB: modelParamB,
                    nominalContext: config.contextLength, effectiveContext: effectiveContextLength,
                });
            }
            const ctxLimits: ContextLimits = {
                contextLength: effectiveContextLength,
                maxOutputTokens: config.maxOutputTokens,
                safetyMargin: config.contextSafetyMargin,
                minOutputTokens: 512,
            };
            const imageCount = (reqMessages as any[])
                .reduce((n, m) => n + (Array.isArray(m?.images) ? m.images.length : 0), 0);
            const imageTokenReserve = imageCount * 1024;

            // (1) 시스템 프롬프트는 예산의 ~65%까지만 허용 — 그 이상이면 [CONTEXT] 블록부터 잘라낸다.
            const systemCapTokens = Math.max(
                1024,
                Math.floor((ctxLimits.contextLength - ctxLimits.safetyMargin - ctxLimits.minOutputTokens - imageTokenReserve) * 0.65)
            );
            const { prompt: budgetedSystemPrompt, truncated: systemTruncated } =
                truncateSystemPromptContext(fullSystemPrompt, systemCapTokens);
            if (systemTruncated) {
                logInfo('System prompt context truncated to fit the context window.', { model: actualModel, systemCapTokens });
            }
            const systemTokens = estimateTokens(budgetedSystemPrompt) + 4;

            // (2) 대화 기록 압축.
            const historyBudget = Math.max(
                256,
                ctxLimits.contextLength - systemTokens - ctxLimits.safetyMargin - ctxLimits.minOutputTokens - imageTokenReserve
            );
            let budgetedHistory: ChatMessage[] = reqMessages;
            if (config.autoCompactHistory) {
                const trim = trimHistoryToBudget<ChatMessage>(reqMessages, historyBudget, (n) => ({
                    role: 'system',
                    content: `[이전 대화 ${n}개 메시지는 컨텍스트 한계 때문에 이번 요청에서 생략되었습니다. 필요하면 사용자에게 다시 확인하세요.]`,
                    internal: true,
                }));
                budgetedHistory = trim.messages;
                if (trim.droppedCount > 0) {
                    logInfo('Conversation history compacted to fit the context window.', {
                        model: actualModel, droppedCount: trim.droppedCount, historyBudget,
                    });
                }
            }

            const messagesForRequest: ChatMessage[] = [
                { role: 'system', content: budgetedSystemPrompt, internal: true },
                ...budgetedHistory
            ];

            // (3) 동적 출력 상한.
            const inputTokens = estimateMessagesTokens(messagesForRequest) + imageTokenReserve;
            const outputBudget = computeOutputBudget(inputTokens, ctxLimits);
            const maxOutputTokens = outputBudget.maxOutputTokens;
            if (outputBudget.tight) {
                logError('Prompt nearly fills the context window — output budget is at the minimum.', {
                    model: actualModel, contextLength: ctxLimits.contextLength, inputTokens, maxOutputTokens,
                });
            }
            logInfo('Context budget computed.', {
                model: actualModel, contextLength: ctxLimits.contextLength,
                inputTokens, maxOutputTokens, droppedHistory: reqMessages.length - budgetedHistory.length,
            });
            let finishStopReason: string | undefined;

            // 4. Call AI Engine
            this.abortController = new AbortController();
            requestTimeoutHandle = setTimeout(() => {
                logError('AI request timed out.', { timeoutMs: timeout, model: actualModel, loopDepth });
                this.abortController?.abort();
            }, timeout);

            const engine = resolveEngine(ollamaUrl);
            const useLmStudioSdk = engine === 'lmstudio' && !!this.options.lmStudioStreamer;
            let apiUrl = '';
            let aiResponseText = '';
            let buffer = '';

            if (loopDepth === 0) {
                // Context-budget preview so the UI can show what actually went into this turn
                // (≈N tokens, Brain N files, open file included?, history compacted?, small-model warning).
                this.webview.postMessage({
                    type: 'contextBudget',
                    value: {
                        model: actualModel,
                        engine,
                        paramB: modelParamB,
                        contextLength: ctxLimits.contextLength,
                        nominalContextLength: config.contextLength,
                        cappedForSmallModel,
                        inputTokens,
                        maxOutputTokens,
                        systemTokens,
                        historyKept: budgetedHistory.length,
                        droppedHistory: reqMessages.length - budgetedHistory.length,
                        systemTruncated,
                        includesOpenFile: !!contextBlock && contextBlock.includes('[Currently open file:'),
                        brainFiles: brainFiles.length,
                        imageCount,
                        tight: outputBudget.tight,
                        smallModel: cappedForSmallModel || (modelParamB !== null && modelParamB <= 3 && inputTokens > 12000),
                    },
                });
                // If the user's message reads like a regression complaint ("또 안 돼", "비슷한 실수", "왜 반복돼"…),
                // offer to record a lesson — a recurring problem is exactly what Experience Memory is for.
                if (prompt && isQaRegressionFeedback(prompt)) {
                    this.webview.postMessage({ type: 'lessonCandidate', value: { trigger: 'qa-feedback' } });
                }
                this.webview.postMessage({ type: 'streamStart' });
                this.options.onStreamLifecycle?.start();
            }

            // Progressive answering: live-stream tokens to the webview during
            // the user-visible first turn (loopDepth === 0). The bubble fills
            // as the model generates instead of dropping all at once at the end,
            // and any auto-continuation rounds keep posting deltas through the
            // same channel. Post-processing (reasoning strip / sanitize /
            // policy enforcement) emits a final `streamReplace` so the bubble
            // ends up matching the cleaned answer regardless of what slipped
            // through live.
            const postLiveDeltas = loopDepth === 0;

            if (useLmStudioSdk) {
                apiUrl = `${ollamaUrl} (sdk)`;
                logInfo('Streaming chat via LM Studio SDK.', { model: actualModel });
                try {
                    const stream = this.options.lmStudioStreamer!.stream({
                        modelName: actualModel,
                        messages: messagesForRequest.map((m) => ({ role: m.role, content: m.content })),
                        temperature,
                        maxTokens: maxOutputTokens,
                        contextOverflowPolicy: config.contextOverflowPolicy,
                        signal: this.abortController.signal,
                    });
                    for await (const { token, stopReason } of stream) {
                        if (this.isStaleRun(runId)) return;
                        if (token) {
                            aiResponseText += token;
                            if (postLiveDeltas) this.webview.postMessage({ type: 'streamChunk', value: token });
                        }
                        if (stopReason) finishStopReason = stopReason;
                    }
                } catch (err: any) {
                    if (err?.name === 'AbortError' || this.abortController.signal.aborted) {
                        logInfo('Generation aborted by user.');
                    } else {
                        const msg = err?.message ?? String(err);
                        if (/context\s*length|contextlengthreached|exceed|too\s*long/i.test(msg)) {
                            finishStopReason = 'contextLengthReached';
                        }
                        logError('LM Studio SDK chat failed.', { engine, error: msg });
                        this.webview?.postMessage({ type: 'error', value: `LM Studio: ${msg}` });
                    }
                }
            } else {
                const request = await this.createStreamingRequest({
                    baseUrl: ollamaUrl,
                    modelName: actualModel,
                    reqMessages: messagesForRequest,
                    temperature,
                    maxTokens: maxOutputTokens,
                    contextLength: ctxLimits.contextLength
                });
                const { response, apiUrl: restApiUrl } = request;
                apiUrl = restApiUrl;
                if (this.isStaleRun(runId)) return;

                const reader = response.body?.getReader();
                if (!reader) throw new Error("Response body is not readable.");

                const decoder = new TextDecoder();
                // try/finally guarantees the reader's lock is released on every
                // exit path (normal end, AbortError, parse exception, stale-run
                // early return). Without this, downstream consumers — including
                // any retry path that wants to drain the same body — fail with
                // "lock() request could not be registered" because the previous
                // reader still holds the stream lock.
                try {
                    while (true) {
                        const { done, value } = await reader.read();
                        if (done) break;
                        if (this.isStaleRun(runId)) return;

                        buffer += decoder.decode(value, { stream: true });
                        const lines = buffer.split('\n');
                        buffer = lines.pop() || '';
                        for (const line of lines) {
                            const trimmed = line.trim();
                            if (!trimmed || trimmed === 'data: [DONE]') continue;
                            try {
                                const raw = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
                                const json = JSON.parse(raw);
                                const token = engine === 'lmstudio' ? json.choices?.[0]?.delta?.content || '' : json.message?.content || json.response || '';
                                if (token) {
                                    aiResponseText += token;
                                    if (postLiveDeltas) this.webview.postMessage({ type: 'streamChunk', value: token });
                                }
                                const fr = engine === 'lmstudio'
                                    ? json.choices?.[0]?.finish_reason
                                    : (json.done_reason ?? (json.done === true ? 'stop' : undefined));
                                if (fr) finishStopReason = fr;
                            } catch (e: any) {
                                logError('Failed to parse streaming chunk.', { engine, apiUrl, chunk: summarizeText(trimmed, 300), error: e?.message || String(e) });
                            }
                        }
                    }
                } catch (err: any) {
                    if (err.name === 'AbortError') {
                        logInfo('Generation aborted by user.');
                    } else {
                        logError('Stream reading error.', { engine, apiUrl, error: err?.message || String(err) });
                        this.webview?.postMessage({ type: 'error', value: `Connection lost: ${err.message}` });
                    }
                } finally {
                    try { reader.releaseLock(); } catch { /* reader may already be released on AbortError */ }
                }
            }

            // Final buffer processing (REST SSE only — SDK has no trailing buffer)
            if (!useLmStudioSdk && buffer.trim() && buffer.trim() !== 'data: [DONE]') {
                try {
                    const trimmed = buffer.trim();
                    const raw = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
                    const json = JSON.parse(raw);
                    const token = engine === 'lmstudio' ? json.choices?.[0]?.delta?.content || '' : json.message?.content || json.response || '';
                    if (token) {
                        aiResponseText += token;
                        if (postLiveDeltas) this.webview.postMessage({ type: 'streamChunk', value: token });
                    }
                    const fr = engine === 'lmstudio'
                        ? json.choices?.[0]?.finish_reason
                        : (json.done_reason ?? (json.done === true ? 'stop' : undefined));
                    if (fr) finishStopReason = fr;
                } catch (e: any) {
                    logError('Failed to parse final streaming buffer.', { engine, apiUrl, buffer: summarizeText(buffer, 300), error: e?.message || String(e) });
                }
            }

            if (this.isStaleRun(runId)) return;
            if (requestTimeoutHandle) {
                clearTimeout(requestTimeoutHandle);
                requestTimeoutHandle = undefined;
            }

            // ── Empty-response auto-recovery ──
            // Streaming failed silently (network blip, model cold-start, context
            // overflow, etc.). Before surfacing the error to the user we try two
            // recovery steps in order:
            //
            //   (1) When the empty stream came from the LM Studio SDK path, drop
            //       the cached handle and retry streaming once. The SDK keeps a
            //       per-model handle in its internal map; an aborted prediction
            //       can leave that handle disposed so the next respond() returns
            //       zero tokens cleanly (no error thrown, stream just ends).
            //       A fresh WebSocket / handle lookup recovers from this without
            //       us having to ask the user to retry.
            //
            //   (2) Fall back to a single non-streaming POST. Many LM Studio
            //       failures are streaming-only (the SSE channel drops mid-token
            //       while one POST returns the whole answer fine).
            //
            // Only attempts recovery on loopDepth === 0 — we don't want to
            // ping-pong inside the autonomous action loop.
            if (!aiResponseText.trim() && !this.abortController?.signal.aborted && loopDepth === 0) {
                if (useLmStudioSdk && this.options.lmStudioStreamer?.resetHandle) {
                    try {
                        logInfo('Empty SDK stream — resetting LM Studio handle and retrying streaming once.', { model: actualModel });
                        await this.options.lmStudioStreamer.resetHandle(actualModel);
                        const retryStream = this.options.lmStudioStreamer.stream({
                            modelName: actualModel,
                            messages: messagesForRequest.map((m) => ({ role: m.role, content: m.content })),
                            temperature,
                            maxTokens: maxOutputTokens,
                            contextOverflowPolicy: config.contextOverflowPolicy,
                            signal: this.abortController.signal,
                        });
                        let retryText = '';
                        for await (const { token, stopReason } of retryStream) {
                            if (this.isStaleRun(runId)) return;
                            if (token) {
                                retryText += token;
                                if (postLiveDeltas) this.webview.postMessage({ type: 'streamChunk', value: token });
                            }
                            if (stopReason) finishStopReason = stopReason;
                        }
                        if (retryText.trim()) {
                            aiResponseText = retryText;
                            logInfo('Handle-reset retry recovered the answer.', { model: actualModel, length: retryText.length });
                        }
                    } catch (retryErr: any) {
                        logError('Handle-reset retry failed.', { model: actualModel, error: retryErr?.message ?? String(retryErr) });
                    }
                }

                if (!aiResponseText.trim() && !this.abortController?.signal.aborted) {
                    try {
                        logInfo('Empty stream — trying non-streaming fallback.', { engine, model: actualModel, apiUrl });
                        const fallback = await this.callNonStreaming({
                            baseUrl: ollamaUrl,
                            modelName: actualModel,
                            engine,
                            messages: messagesForRequest,
                            temperature,
                            maxTokens: maxOutputTokens,
                            contextLength: ctxLimits.contextLength,
                            signal: this.abortController?.signal,
                        });
                        if (fallback.stopReason) finishStopReason = fallback.stopReason;
                        if (fallback.text && fallback.text.trim()) {
                            aiResponseText = fallback.text;
                            logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.text.length });
                        }
                    } catch (recoverErr: any) {
                        logError('Non-streaming fallback also failed.', {
                            engine, model: actualModel, error: recoverErr?.message ?? String(recoverErr),
                        });
                    }
                }
            }

            // ── Thought Quarantine + Final-only Retry + Auto-Continuation ──
            // The user is waiting for an answer, not for a chance to manage the generation engine:
            //   (a) hidden reasoning (Harmony channels, <think>…, "Thinking Process:") never reaches
            //       the screen — stripped here, and from what executeActions / chatHistory see;
            //   (b) if the model emitted *only* reasoning → silently retry, final-answer-only;
            //   (c) if the answer was cut off at the output ceiling → continue it internally with a
            //       *compressed* request (original question + the answer so far), up to N rounds.
            let cleaned = extractVisibleFinal(aiResponseText);
            if (cleaned.hadHiddenReasoning) {
                logInfo('Stripped hidden reasoning from the model output.', {
                    model: actualModel, hiddenChars: cleaned.hiddenReasoning.length,
                    visibleChars: cleaned.visible.length, hadFinalChannel: cleaned.hadFinalChannel,
                    thoughtOnly: cleaned.wasThoughtOnly,
                });
            }

            // (b) Final-only retry — the reply was reasoning-only, no visible answer.
            if (shouldFinalOnlyRetry(cleaned)
                && config.finalOnlyRetryOnThoughtLeak
                && loopDepth === 0
                && !this.abortController?.signal.aborted) {
                try {
                    this.webview.postMessage({ type: 'autoContinue', value: '답변을 정리하는 중입니다...' });
                    const retryMsgs: ChatMessage[] = messagesForRequest.map((m, i) =>
                        i === 0 ? { ...m, content: `${m.content}\n${FINAL_ONLY_DIRECTIVE}` } : m);
                    const r = await this.callNonStreaming({
                        baseUrl: ollamaUrl, modelName: actualModel, engine, messages: retryMsgs,
                        temperature, maxTokens: maxOutputTokens, contextLength: ctxLimits.contextLength,
                        signal: this.abortController?.signal,
                    });
                    if (r.stopReason) finishStopReason = r.stopReason;
                    const rc = extractVisibleFinal(r.text);
                    if (rc.visible.trim()) {
                        logInfo('Final-only retry recovered a visible answer.', { model: actualModel, length: rc.visible.length });
                        aiResponseText = r.text;
                        cleaned = rc;
                    }
                } catch (e: any) {
                    logError('Final-only retry failed.', { model: actualModel, error: e?.message ?? String(e) });
                }
            }

            // (c) Auto-continuation — the visible answer hit the output-token ceiling.
            let continuationCount = 0;
            if (config.autoContinueOnOutputLimit && config.maxAutoContinuations > 0 && loopDepth === 0) {
                const originalUserPrompt = prompt || (this.chatHistory.find(m => m.role === 'user' && typeof m.content === 'string')?.content as string) || '';
                let lastOutputTokens = estimateTokens(cleaned.visible);
                let lastMaxOutputTokens = maxOutputTokens;   // budget the last round actually had (≠ first gen's after round 1)
                while (
                    shouldAutoContinue(classifyStopReason(finishStopReason), cleaned.visible, lastOutputTokens, lastMaxOutputTokens)
                    && continuationCount < config.maxAutoContinuations
                    && !this.abortController?.signal.aborted
                    && !this.isStaleRun(runId)
                ) {
                    continuationCount++;
                    const continuationStartMs = Date.now();
                    this.webview.postMessage({ type: 'autoContinue', value: `답변이 길어 이어서 정리하는 중입니다... (${continuationCount}/${config.maxAutoContinuations})` });
                    try {
                        const contMsgs: ChatMessage[] = [
                            { role: 'system', content: CONTINUATION_SYSTEM_PROMPT, internal: true },
                            { role: 'user', content: buildContinuationUserPrompt(originalUserPrompt, cleaned.visible) },
                        ];
                        lastMaxOutputTokens = computeOutputBudget(estimateMessagesTokens(contMsgs), ctxLimits).maxOutputTokens;
                        // Stream the continuation through the same channel as the main turn so
                        // the user sees the answer keep growing instead of freezing for 10–30s
                        // while we silently call non-streaming. The trailing streamReplace
                        // (after sanitize / merge) corrects any overlap the model re-emits.
                        const cr = await this.streamChatOnce({
                            runId, useLmStudioSdk, engine, ollamaUrl, modelName: actualModel,
                            messages: contMsgs,
                            temperature,
                            maxTokens: lastMaxOutputTokens,
                            contextLength: ctxLimits.contextLength,
                            contextOverflowPolicy: config.contextOverflowPolicy,
                            signal: this.abortController!.signal,
                            postLiveDeltas,
                        });
                        if (cr.aborted) {
                            logInfo('Auto-continuation aborted mid-stream.', { model: actualModel, round: continuationCount });
                            break;
                        }
                        finishStopReason = cr.stopReason;
                        const ccl = extractVisibleFinal(cr.text);
                        if (!ccl.visible.trim()) {
                            logInfo('Continuation produced no visible text — stopping.', { model: actualModel, round: continuationCount });
                            break;
                        }
                        const before = cleaned.visible;
                        cleaned = { ...cleaned, visible: mergeContinuationParts(cleaned.visible, ccl.visible), wasThoughtOnly: false };
                        lastOutputTokens = estimateTokens(ccl.visible);
                        logInfo('Auto-continued the answer.', { model: actualModel, round: continuationCount, addedChars: ccl.visible.length, totalChars: cleaned.visible.length, contStopReason: cr.stopReason, contMaxTokens: lastMaxOutputTokens });
                        recordTelemetry({
                            kind: 'continuation',
                            durationMs: Date.now() - continuationStartMs,
                            model: actualModel, engine,
                            outputTokens: lastOutputTokens,
                            round: continuationCount,
                            stopReason: cr.stopReason,
                            note: `addedChars=${ccl.visible.length} mergedAdd=${cleaned.visible.length - before.length}`,
                        });
                        // Guard against a continuation that adds (almost) nothing new after dedup — stop instead of spinning.
                        if (cleaned.visible.length - before.length < 20) {
                            logInfo('Continuation added negligible new text — stopping.', { model: actualModel, round: continuationCount });
                            break;
                        }
                    } catch (e: any) {
                        logError('Auto-continuation failed.', { model: actualModel, round: continuationCount, error: e?.message ?? String(e) });
                        break;
                    }
                }
                if (this.isStaleRun(runId)) return;
            }
            const cleanedVisible = cleaned.visible;

            // 5. Execute Actions
            const rationale = this.parseRationale(cleanedVisible);
            let assistantContent = this.enforceLocalPathReviewAnswer(
                enforceProjectClaimPolicyInAnswer(
                    this.sanitizeAssistantContent(cleanedVisible),
                    secondBrainTrace
                ),
                localPathContext
            );
            if (prompt && this.isSecondBrainInventoryRequest(prompt) && brainFiles.length > 0 && this.isNoBrainDataRefusal(assistantContent)) {
                assistantContent = this.buildSecondBrainInventoryFallbackAnswer(activeBrain, brainFiles, secondBrainTrace);
            }
            // Note: a previous implementation replaced LLM review answers with a
            // hardcoded Korean template whenever the answer didn't match enough
            // keywords. That made every review feel canned and project-agnostic
            // (the template was Datacollector-flavored). We now let the LLM's
            // answer stand — the system prompt for review-evaluation
            // (buildLocalProjectIntentGuidance / buildAstraStanceContext) is
            // strong enough to keep the response concrete.
            if (prompt && localPathContext && this.isProjectKnowledgeCreationRequest(prompt)) {
                const record = this.writeProjectKnowledgeRecord(localPathContext);
                if (this.isBlockingProjectKnowledgeAnswer(assistantContent)) {
                    assistantContent = this.buildProjectKnowledgeFallbackAnswer(localPathContext, record);
                } else if (record && !assistantContent.includes(record.filePath)) {
                    assistantContent = [
                        assistantContent,
                        '',
                        '## 생성된 기록',
                        `프로젝트 지식 기록을 생성했습니다: \`${record.filePath}\``
                    ].join('\n');
                }
            }
            // Surface truncated/abnormal generation so the user knows the answer is incomplete.
            const stopKind = classifyStopReason(finishStopReason);
            if (stopKind === 'output-limit' || stopKind === 'context-overflow' || stopKind === 'error') {
                logError('Generation stopped abnormally.', {
                    model: actualModel, engine, stopReason: finishStopReason, stopKind,
                    inputTokens, maxOutputTokens, answerChars: assistantContent.length,
                });
            }
            const outputTokens = estimateTokens(assistantContent);
            // Show the "incomplete" notice when the engine said output-limit/context-overflow/error,
            // OR when (after all auto-continuation rounds) the answer still plainly ends mid-sentence.
            const notice =
                shouldShowTruncationNotice(stopKind, outputTokens, maxOutputTokens) ? truncationNotice(stopKind)
                : looksCutOff(assistantContent) ? truncationNotice('output-limit')
                : '';
            if (notice && assistantContent.trim()) {
                assistantContent = assistantContent.trimEnd() + notice;
            }
            const finalAssistantContent = assistantContent;

            const assistantMessage: ChatMessage = { role: 'assistant', content: finalAssistantContent, internal: false, rationale };
            this.chatHistory.push(assistantMessage);
            this.emitHistoryChanged();

            this.statusBarManager.updateStatus(AgentStatus.Executing);
            // Action tags are honored only from the visible final answer — never from hidden reasoning.
            const report = await this.executeActions(cleanedVisible, rootPath, activeBrain);
            if (!assistantContent.trim() && report.length === 0) {
                const promptCharCount = messagesForRequest.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
                logError('Model returned an empty response without actions.', {
                    model: actualModel, engine, apiUrl, loopDepth,
                    promptCharCount, inputTokens, maxOutputTokens, contextLength: ctxLimits.contextLength,
                    estimatedOverflow: outputBudget.tight, stopReason: finishStopReason,
                    messageCount: messagesForRequest.length,
                    fallbackTried: loopDepth === 0 ? 'yes' : 'no',
                });
                // Cheap heuristic: parse a parameter-count hint out of the
                // model identifier (e.g. "google/gemma-4-e2b", "qwen2-1.5b").
                // Anything <= 3B is small enough that long-context generation
                // commonly fails by emitting EOS as the first token even though
                // the server log shows prompt-eval succeeded with truncated=0.
                const smallModelMatch = actualModel.match(/(?<![0-9.])((?:[0-9]+\.)?[0-9]+)\s*[bB](?![a-zA-Z0-9])|[-_/]e?([0-9]+)b\b/i);
                const paramB = smallModelMatch
                    ? Number(smallModelMatch[1] ?? smallModelMatch[2])
                    : Number.NaN;
                const looksSmall = Number.isFinite(paramB) && paramB <= 3;
                const promptIsLarge = promptCharCount > 60000; // ~15k tokens of English/code
                const contextLimitHint =
                    'LM Studio 로그에 `n_tokens = N, truncated = 0` 인데 `eval time` 이 0ms 라면 모델이 첫 토큰부터 EOS 를 뱉은 것입니다. 보통 컨텍스트 한계 초과 또는 모델 용량 부족입니다. 더 큰 모델(7B+)로 교체하거나 컨텍스트를 줄여 보세요.';

                const looksOverflow = outputBudget.tight || inputTokens > ctxLimits.contextLength - ctxLimits.safetyMargin;
                this.webview.postMessage({
                    type: 'error',
                    value: [
                        'AI 엔진이 빈 응답을 반환했습니다 (스트리밍 + non-streaming 폴백 모두 실패).',
                        `Engine: ${engine}`,
                        `Model: ${actualModel}`,
                        `Prompt: ~${inputTokens.toLocaleString()} tokens (${promptCharCount.toLocaleString()} chars, ${messagesForRequest.length} messages) / context window ${ctxLimits.contextLength.toLocaleString()} tokens`,
                        `Output budget: ${maxOutputTokens.toLocaleString()} tokens`,
                        ...(finishStopReason ? [`Stop reason: ${finishStopReason}`] : []),
                        '',
                        '다음을 시도해보세요:',
                        '  • LM Studio에서 모델이 실제로 로드되어 있는지 확인',
                        looksOverflow
                            ? '  • 입력이 모델 context window 에 가깝습니다. `/newChat` 으로 대화를 새로 시작하거나, Skill/Brain 컨텍스트를 줄이거나, Settings 의 `g1nation.contextLength` 를 모델 실제 값으로 맞추세요.'
                            : '  • 다른 모델로 전환하거나 LM Studio 서버를 재시작',
                        '  • Settings에서 maxContextSize / memoryLongTermFiles 줄이기',
                        ...(looksSmall || promptIsLarge ? ['  • ' + contextLimitHint] : []),
                    ].join('\n')
                });
                return;
            }

            if (report.length > 0) {
                logInfo('Agent actions executed.', { loopDepth: loopDepth + 1, report });

                // Continue loop if needed
                if (loopDepth < config.maxAutoSteps) {
                    const currentActionStr = report.join('|');
                    const lastActionStr = this.context.workspaceState.get<string>('lastActionStr');

                    if (currentActionStr === lastActionStr) {
                        this.webview.postMessage({ type: 'streamChunk', value: "\n⚠️ *Stopping to prevent infinite loop.*" });
                        return;
                    }

                    await this.context.workspaceState.update('lastActionStr', currentActionStr);
                    logInfo('Autonomous loop continuing after actions.', { loopDepth: loopDepth + 1, actions: report });

                    // Explicitly tell the AI to look at the results and continue
                    const continuationPrompt = `The requested local action has been executed.\nAction report:\n${report.join('\n')}\nUse the action result messages already in the conversation to answer the user's original request directly, in the user's language. Do not say you are waiting for the next instruction.`;

                    this.webview.postMessage({ type: 'autoContinue', value: `자료를 확인하고 답변을 정리하는 중입니다... (${loopDepth + 1}/${config.maxAutoSteps})` });
                    await new Promise(r => setTimeout(r, 800));
                    if (this.isStaleRun(runId)) return;
                    await this.handlePrompt(continuationPrompt, modelName, { ...options, loopDepth: loopDepth + 1, runId });
                }
                return;
            }

            this.statusBarManager.updateStatus(AgentStatus.Success);
            if (this._lastRetrievalInfo) {
                // Non-blocking flag: lesson Prevention-Checklist items the answer doesn't visibly touch on.
                const unaddressedChecklist = findUnaddressedChecklistItems(finalAssistantContent, this._lastLessonContents);
                this.webview.postMessage({
                    type: 'usedScope',
                    value: {
                        ...this._lastRetrievalInfo,
                        hasAgentSelected: !!options.agentSkillFile,
                        unaddressedChecklist,
                        // Knowledge Mix surfaced under the answer so the user can see what policy ran.
                        knowledgeMix: this._lastKnowledgeMix
                            ? {
                                weight: this._lastKnowledgeMix.weight,
                                source: this._lastKnowledgeMix.source,
                                agent: this._lastKnowledgeMix.agent,
                            }
                            : null,
                    },
                });
            }
            // Progressive answering: the bubble was filled live with raw tokens
            // during streaming (and during any auto-continuation rounds). Now
            // that we have the cleaned + merged + policy-enforced text, swap the
            // bubble's content for the final version so the user sees the
            // correct answer regardless of what slipped through live —
            // hidden reasoning, mid-stream artifacts, continuation-overlap re-
            // emits, truncation notice. Action-loop turns (loopDepth > 0) still
            // append via streamChunk because the bubble has multiple action
            // segments and we don't have a single "final" to replace with.
            if (loopDepth === 0) {
                this.webview.postMessage({ type: 'streamReplace', value: finalAssistantContent });
                recordTelemetry({
                    kind: 'turn',
                    durationMs: Date.now() - turnStartMs,
                    model: actualModel, engine,
                    inputTokens,
                    outputTokens,
                    contextLength: ctxLimits.contextLength,
                    stopReason: finishStopReason,
                    brainFiles: this._lastRetrievalInfo?.usedBrainFiles.length ?? 0,
                    memoryLayers: this._lastRetrievalInfo?.usedMemoryLayers ?? [],
                    note: `continuations=${continuationCount} historyDropped=${reqMessages.length - budgetedHistory.length}`,
                });
            } else {
                this.webview.postMessage({ type: 'streamChunk', value: finalAssistantContent });
            }

        } catch (error: any) {
            this.statusBarManager.updateStatus(AgentStatus.Error, error.message);
            logError('Agent prompt failed.', { error: error?.message || String(error), promptPreview: summarizeText(prompt || '', 200) });
            if (!this.isStaleRun(runId)) {
                this.webview.postMessage({ type: "error", value: `[Agent Error]: ${error.message}` });
            }
        } finally {
            if (requestTimeoutHandle) {
                clearTimeout(requestTimeoutHandle);
            }
            if (loopDepth === 0 && !this.isStaleRun(runId)) {
                this.webview.postMessage({ type: 'streamEnd' });
                this.options.onStreamLifecycle?.end();
            }
        }
    }

    public async executeMultiAgentWorkflow(
        prompt: string,
        modelName: string,
        options: any
    ) {
        if (!this.webview) return;
        this.stop();
        this.abortController = new AbortController();
        const signal = this.abortController.signal;

        this.statusBarManager.updateStatus(AgentStatus.Thinking, 'Multi-Agent Workflow Running');
        this.webview.postMessage({ type: 'streamStart' });
        this.options.onStreamLifecycle?.start();

        try {
            let brainContext = 'No specific context available';
            try {
                const config = getConfig();
                const activeBrain = options.brainProfileId
                    ? (config.brainProfiles.find((profile) => profile.id === options.brainProfileId) || getActiveBrainProfile())
                    : getActiveBrainProfile();
                const brainFiles = findBrainFiles(activeBrain.localBrainPath);
                brainContext = `Brain: ${activeBrain.name}, Files: ${brainFiles.length}`;
            } catch (ctxErr) {
                logError('Failed to load brain context for agents', ctxErr);
            }

            const selectedAgentContext = options.agentSkillContext
                ? `\nSelected Agent Reference:\n${options.agentSkillContext}`
                : '';
            const designerContext = options.designerContext
                ? `\nProject Chronicle Guard:\n${options.designerContext}`
                : '';

            // 워크플로우 매니저에게 설정 기반 실행 위임
            const finalReport = await AgentWorkflowManager.runStrictWorkflow(
                prompt,
                modelName,
                `${brainContext}${selectedAgentContext}${designerContext}`,
                signal,
                (step, msg) => {
                    this.webview?.postMessage({ type: 'autoContinue', value: `${step}: ${msg}` });
                    // 각 단계별 시작을 알림
                    this.webview?.postMessage({ type: 'streamChunk', value: `\n\n> **[${step}]** ${msg}\n\n` });
                }
            );

            if (signal.aborted || !this.webview) return;

            this.webview.postMessage({ type: 'streamChunk', value: `\n\n--- \n\n${finalReport}` });
            this.webview.postMessage({ type: 'streamEnd' });

            this.chatHistory.push({ role: 'assistant', content: finalReport });
            this.emitHistoryChanged();

            this.statusBarManager.updateStatus(AgentStatus.Success, 'Workflow Complete');
            this.webview.postMessage({ type: 'autoContinue', value: '✅ 모든 분석이 성공적으로 완료되었습니다.' });

        } catch (error: any) {
            if (error.name === 'AbortError' || error.message?.includes('cancelled')) {
                this.statusBarManager.updateStatus(AgentStatus.Idle, 'Workflow Cancelled');
                return;
            }
            const friendly = ErrorTranslator.translate(error);
            logError('Workflow failed', error);

            this.webview.postMessage({ type: 'autoContinue', value: '' });
            this.webview.postMessage({
                type: 'error',
                value: `### ${friendly.title}\n\n**상태:** ${friendly.message}\n\n**해결 방법:** ${friendly.action}`
            });
            this.statusBarManager.updateStatus(AgentStatus.Idle, 'Error occurred');
        } finally {
            this.options.onStreamLifecycle?.end();
        }
    }

    private async callAgent(role: AgentRole, prompt: string, modelName: string, options: any): Promise<string> {
        const persona = AGENT_PROMPTS[role];
        const { ollamaUrl, contextLength, maxOutputTokens, contextSafetyMargin, contextOverflowPolicy } = getConfig();

        const messages: ChatMessage[] = [
            { role: 'system', content: persona },
            { role: 'user', content: prompt }
        ];
        // Dynamic output cap so input + output stays within the context window.
        const inputTokens = estimateMessagesTokens(messages);
        const { maxOutputTokens: subMaxTokens } = computeOutputBudget(inputTokens, {
            contextLength, maxOutputTokens, safetyMargin: contextSafetyMargin, minOutputTokens: 512,
        });

        const engine = resolveEngine(ollamaUrl);
        let responseText = '';

        if (engine === 'lmstudio' && this.options.lmStudioStreamer) {
            try {
                const stream = this.options.lmStudioStreamer.stream({
                    modelName,
                    messages: messages.map((m) => ({ role: m.role, content: m.content })),
                    temperature: 0.3,
                    maxTokens: subMaxTokens,
                    contextOverflowPolicy,
                    signal: this.abortController?.signal,
                });
                for await (const { token } of stream) {
                    if (token) responseText += token;
                }
                return responseText;
            } catch (err: any) {
                if (err?.name === 'AbortError' || this.abortController?.signal.aborted) return responseText;
                logError('LM Studio SDK callAgent stream failed.', { role, error: err?.message ?? String(err) });
                throw err;
            }
        }

        const request = await this.createStreamingRequest({
            baseUrl: ollamaUrl,
            modelName: modelName,
            reqMessages: messages,
            temperature: 0.3, // Use lower temperature for planning and research
            maxTokens: subMaxTokens,
            contextLength
        });

        const reader = request.response.body?.getReader();
        if (!reader) throw new Error("Agent response body is not readable.");

        const decoder = new TextDecoder();
        try {
            while (true) {
                const { done, value } = await reader.read();
                if (done) break;
                const chunk = decoder.decode(value, { stream: true });
                const lines = chunk.split('\n');
                for (const line of lines) {
                    const trimmed = line.trim();
                    if (!trimmed || trimmed === 'data: [DONE]') continue;
                    try {
                        const json = JSON.parse(trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed);
                        const content = json.choices?.[0]?.delta?.content || json.message?.content || '';
                        responseText += content;
                    } catch (e) { }
                }
            }
        } finally {
            try { reader.releaseLock(); } catch { /* already released */ }
        }
        return responseText;
    }

    private isExplicitSecondBrainRequest(prompt: string): boolean {
        return /(second brain|2nd brain|제2뇌|브레인|brain|기억|기록|노트|문서|참고해서|사용해서|검색해서|근거|출처)/i.test(prompt);
    }

    private isSecondBrainInventoryRequest(prompt: string): boolean {
        const normalized = prompt.toLowerCase();
        const asksBrain = /(second brain|2nd brain|제2뇌|브레인|brain)/i.test(normalized);
        const asksOverview = /(평가|분석|강점|약점|부족|무엇을 할 수|활용|전체|연결된|현재|inside|overview|inventory|strength|weakness)/i.test(normalized);
        return asksBrain && asksOverview;
    }

    private buildSecondBrainInventoryContext(activeBrain: BrainProfile, brainFiles: string[]): string {
        const relativeFiles = brainFiles.map((file) => path.relative(activeBrain.localBrainPath, file));
        const directoryCounts = new Map<string, number>();
        for (const rel of relativeFiles) {
            const topDir = rel.includes(path.sep) ? rel.split(path.sep)[0] : '(root)';
            directoryCounts.set(topDir, (directoryCounts.get(topDir) || 0) + 1);
        }

        const topDirectories = [...directoryCounts.entries()]
            .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
            .slice(0, 12)
            .map(([dir, count]) => `- ${dir}: ${count} markdown files`)
            .join('\n');

        const samples = relativeFiles
            .slice(0, 40)
            .map((file) => `- ${file}`)
            .join('\n');

        return [
            '[SECOND BRAIN INVENTORY]',
            'The user is asking about the currently selected Second Brain as a knowledge base. Use this inventory as direct evidence.',
            `Selected brain name: ${activeBrain.name}`,
            `Selected brain path: ${activeBrain.localBrainPath}`,
            `Markdown file count: ${brainFiles.length}`,
            brainFiles.length > 0
                ? 'Do not say the Second Brain has no data, no files, or cannot be evaluated because files were not provided.'
                : 'No Markdown files were found in the selected Second Brain path.',
            topDirectories ? `Top-level distribution:\n${topDirectories}` : 'Top-level distribution: none',
            samples ? `Sample files:\n${samples}` : 'Sample files: none',
            'For strengths and weaknesses, infer from the inventory and selected note excerpts. Mark broad conclusions as inference when they are not directly proven.'
        ].join('\n');
    }

    private isNoBrainDataRefusal(answer: string): boolean {
        return /(분석할 만한 실제 데이터가 없어|분석할.*데이터가 없어|파일 목록.*제공|핵심 내용.*제공|자료를 준비|지식을 먼저 제공|cannot be evaluated|no data|no files)/i.test(answer);
    }

    private buildSecondBrainInventoryFallbackAnswer(activeBrain: BrainProfile, brainFiles: string[], trace: SecondBrainTrace | null): string {
        const relativeFiles = brainFiles.map((file) => path.relative(activeBrain.localBrainPath, file));
        const directoryCounts = new Map<string, number>();
        for (const rel of relativeFiles) {
            const topDir = rel.includes(path.sep) ? rel.split(path.sep)[0] : '(root)';
            directoryCounts.set(topDir, (directoryCounts.get(topDir) || 0) + 1);
        }

        const topDirectories = [...directoryCounts.entries()]
            .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
            .slice(0, 8);
        const distribution = topDirectories
            .map(([dir, count]) => `- ${dir}: ${count}개`)
            .join('\n');
        const selectedDocs = trace?.retrievedDocuments
            .filter((doc) => doc.selectedForAnswerContext)
            .map((doc) => `- ${doc.path} (${doc.sourceType}, score ${doc.score})`)
            .join('\n') || '';

        return [
            '## 간단 요약',
            `현재 선택된 제2뇌는 비어 있지 않습니다. \`${activeBrain.localBrainPath}\` 아래에서 Markdown 파일 ${brainFiles.length}개를 확인했기 때문에, 강점과 약점을 평가할 수 있습니다.`,
            '',
            '## 강점',
            '1. 지식량이 충분합니다. 수천 개 규모의 Markdown 노트가 있어 단일 프로젝트 메모장이 아니라 실제 지식 베이스로 볼 수 있습니다.',
            '2. 상위 폴더 기준으로 주제가 나뉘어 있어 검색과 확장에 유리합니다.',
            '3. AI, UX, 프로젝트 로그처럼 실행 지식과 참고 지식이 함께 있어 기획, 리서치, 의사결정 보조에 쓸 수 있습니다.',
            '4. Trace가 실제 문서를 찾고 있으므로 연결 자체는 동작합니다.',
            '',
            '## 약점',
            '1. 검색 결과에서 인덱스 문서와 일반 지식 문서가 상위에 올라옵니다. 제2뇌 전체 평가에는 도움이 되지만, 구체적 판단 근거로는 밀도가 낮습니다.',
            '2. Project Evidence와 General Knowledge가 명확히 분리되지 않아 답변이 조심스러워집니다.',
            '3. “강점/약점 평가” 같은 전체 분석 요청에는 단일 키워드 검색보다 폴더 분포, 대표 문서, 최근 문서, 프로젝트 로그를 함께 보는 전용 분석 흐름이 필요합니다.',
            '4. 문서 수가 많아서 요약 인덱스, 태그, source type 메타데이터가 약하면 좋은 문서가 검색 순위에서 밀릴 수 있습니다.',
            '',
            '## 확인된 분포',
            distribution || '- 상위 폴더 없음',
            '',
            selectedDocs ? '## 이번 검색에서 잡힌 문서\n' + selectedDocs : '',
            '',
            '## 활용 가능성',
            '이 제2뇌는 프로젝트 회고, UX/비즈니스 판단, 기술 리서치, 제안서 초안, 의사결정 근거 정리, 고객 요구사항 검토에 쓸 수 있습니다. 다음 개선 포인트는 “인덱스 문서보다 실제 근거 문서를 우선 선택하는 검색 랭킹”과 “프로젝트 근거 문서에 명시적 메타데이터를 붙이는 것”입니다.'
        ].filter(Boolean).join('\n');
    }

    private isStaleRun(runId: number): boolean {
        return runId !== this.activeRunId;
    }

    private buildLocalProjectPathContext(prompt: string, rootPath: string): string {
        if (!this.shouldPreflightLocalProjectPath(prompt)) {
            return '';
        }

        const candidates = this.extractLocalProjectPaths(prompt, rootPath);
        if (candidates.length === 0) {
            return '';
        }

        const intent = this.classifyLocalProjectIntent(prompt);
        const sections: string[] = [
            '[LOCAL PROJECT PATH PREFLIGHT]',
            `Local project intent: ${intent}`,
            this.buildLocalProjectIntentGuidance(intent),
            '[CRITICAL DIRECTIVE] The file structure and snippets below are an INITIAL scan from the local filesystem.',
            'If you need to see the full content of any file or explore other directories to perform the analysis, you MUST use the <read_file path="..."> or <list_files path="..."> action tags immediately in your response.',
            'DO NOT ask the user to provide, upload, paste, or share the file contents. DO NOT ask for permission to read them. Just use the action tags to read them yourself.',
            'DO NOT say "파일 내용을 보여주세요", "코드를 공유해 주세요", or "파일을 제공해 주세요".',
            'Proceed IMMEDIATELY with analysis or with using action tags to gather more context. Do not ask for confirmation like "진행할까요?" or "분석을 시작할까요?". Just do it.',
            'If multiple files are mentioned, analyze them sequentially in the order the user specified without pausing for confirmation between each.',
            'The user provided a local project path for review, analysis, documentation, or knowledge creation. Use this inspected context, and if needed use <read_file> to dig deeper before answering.',
            'If access failed, explain the concrete failure.',
            'If access succeeded and priority file previews are present, do not say that code was not provided.',
            'Treat the Local project intent line as the routing decision for this response.',
            'If intent is review-evaluation, do not create a project knowledge note. Review the inspected project as the primary task: strengths, weaknesses, risks, and extensibility.',
            'If intent is knowledge-creation, answer that the project can be summarized from the inspected local path and propose or execute a project knowledge note based on the previews.',
            'If intent is thinking, act as a project thinking partner and give a clear verdict grounded in the inspected files.'
        ];

        for (const candidate of candidates.slice(0, 5)) {
            sections.push(this.inspectLocalProjectPath(candidate, rootPath));
        }

        return sections.join('\n');
    }

    private buildLocalProjectIntentGuidance(intent: LocalProjectIntent): string {
        switch (intent) {
            case 'review-evaluation':
                return [
                    'Intent operating contract — Code Review:',
                    'The user wants a real review, not a meta-plan of how to review.',
                    'Required sections in this exact order, in Korean:',
                    '  1. ## 한 줄 판단 — one sentence: would you rely on this today, and under what constraint?',
                    '  2. ## 잘된 점 — 2~4 concrete strengths. Each MUST cite a specific file path (and a function or section if you can name one) and explain WHY it works, not just that it exists.',
                    '  3. ## 부족한 점 — 2~4 concrete weaknesses or risks. Same rule: cite a specific file/area, name the actual problem (race condition, missing retry, coupling, etc.), and say what breaks because of it.',
                    '  4. ## 사용자 관점 개선 — 2~4 changes phrased from the END USER\'s perspective ("when X happens, the user currently sees Y; they should see Z"). Tie each to a code location that needs to change.',
                    '  5. ## 다음 한 수 — exactly one next action, small enough to do this week.',
                    '',
                    'Hard rules — these are the things that made past reviews feel like a template:',
                    '- Do NOT write meta-sentences like "확인해야 합니다", "다음 리뷰에서는 ~를 보면 됩니다", "~로 보입니다", "~인지 확인하는 것이 핵심입니다". Either you observed it or you read the file with <read_file> right now.',
                    '- Do NOT list the file structure tree back to the user — they already see it. Reference files only when making a specific claim.',
                    '- Do NOT use the words "blind spot", "파이프라인 안정화", "골격은 있습니다" — these are tells of the old canned response.',
                    '- If a file preview is insufficient to support a claim, USE <read_file path="..."> immediately to read it before writing the section. Do not hedge with "preview만으로는 판단할 수 없습니다".',
                    '- Strengths and weaknesses must be SPECIFIC to this project. A sentence that would still be true if you swapped the project name is not allowed.',
                    '- Skip every section that has nothing concrete to say. Better to write 잘된 점 with 2 strong items than 4 weak ones.'
                ].join('\n');
            case 'knowledge-creation':
                return [
                    'Intent operating contract:',
                    '- Create a reusable project knowledge note from inspected evidence.',
                    '- Do not ask for scope if the path is accessible; choose a small MVP overview by default.',
                    '- Separate confirmed structure from inferred purpose and next deep-dive targets.'
                ].join('\n');
            case 'implementation':
                return [
                    'Intent operating contract:',
                    '- Treat this as a change request, not advice.',
                    '- Inspect the relevant files, make the smallest safe implementation, and verify it.',
                    '- Preserve unrelated user changes.'
                ].join('\n');
            case 'documentation':
                return [
                    'Intent operating contract:',
                    '- Produce or update documentation from inspected evidence.',
                    '- Separate user-facing usage docs from internal architecture notes.',
                    '- Avoid claiming behavior that is not visible in code or existing docs.'
                ].join('\n');
            case 'thinking':
                return [
                    'Intent operating contract:',
                    '- Act as a thinking partner.',
                    '- Give a direct opinion, then split confirmed facts, inferences, risks, decision forks, and one next move.',
                    '- Avoid generic encouragement.'
                ].join('\n');
            default:
                return [
                    'Intent operating contract:',
                    '- Use the inspected local files as grounding.',
                    '- If the user request is ambiguous, answer the most likely project-oriented task and state the assumption.'
                ].join('\n');
        }
    }

    /**
     * [Agent Mode v3] 에이전트 모드에서 Astra 기본 시스템 프롬프트의
     * 포맷/페르소나/스탠스 섹션을 제거하여 에이전트 프롬프트와의 충돌을 방지.
     * 유지: [CORE BEHAVIOR], [LOCAL PATH RULE], [STRICT GLOBAL RULES], [ACTION TAGS], [OPERATIONAL RULES]
     * 제거: [OUTPUT FORMAT], [ENGINEERING STANCE], [FOLLOW-UP QUESTION RULES], Astra 페르소나
     */
    private stripAstraFormattingForAgentMode(prompt: string): string {
        let stripped = prompt;

        // Astra 페르소나 소개를 중립적인 어시스턴트로 대체
        stripped = stripped.replace(
            /You are Astra, a Jarvis-style local project operating assistant\.\s*\nIf the user asks your name, say you are Astra\.\s*\n/,
            'You are a specialized AI assistant operating in Agent Mode.\n'
        );

        // [OUTPUT FORMAT] 섹션 제거 (## 요약/상세 설명/제안 포맷 지시)
        stripped = stripped.replace(
            /\[OUTPUT FORMAT\][\s\S]*?(?=\[FOLLOW-UP QUESTION RULES\]|\[ENGINEERING STANCE\]|\[ACTION TAGS\])/,
            ''
        );

        // [FOLLOW-UP QUESTION RULES] 섹션 제거
        stripped = stripped.replace(
            /\[FOLLOW-UP QUESTION RULES\][\s\S]*?(?=\[ENGINEERING STANCE\]|\[ACTION TAGS\])/,
            ''
        );

        // [ENGINEERING STANCE] 섹션 제거 (Astra 전용 응답 스타일)
        stripped = stripped.replace(
            /\[ENGINEERING STANCE\][\s\S]*?(?=\[ACTION TAGS\])/,
            ''
        );

        // [NO EMOJIS] 규칙 제거 — 에이전트 프롬프트에 이모지가 포함될 수 있음
        stripped = stripped.replace(
            /1\. \[NO EMOJIS - ABSOLUTE RULE\][^\n]*\n/,
            ''
        );

        // 연속 빈 줄 정리
        stripped = stripped.replace(/\n{4,}/g, '\n\n');

        return stripped;
    }

    private buildAstraStanceContext(prompt: string, localPathContext: string): string {
        const intent = localPathContext ? this.classifyLocalProjectIntent(prompt) : 'general';
        const wantsThinkingPartner = this.isThinkingPartnerRequest(prompt) || intent === 'review-evaluation' || intent === 'thinking';

        const lines = [
            '[ASTRA STANCE LAYER]',
            'Use this to make the response feel like Astra thinking with the user, not a template being filled.',
            '',
            'Voice:',
            '- Warm, direct, and grounded. Do not over-explain the framework.',
            '- Prefer sentences that sound like a senior collaborator: "나는 여기서 X를 먼저 볼 것 같아요" / "이건 좋아요, 그런데 위험은 Y예요."',
            '- Avoid sterile balance like "장단점이 있습니다" unless you immediately make a call.',
            '',
            'Judgment habits:',
            '- State the real bet you think the user is making.',
            '- Name one thing to keep, one thing to cut, and one thing to verify next when relevant.',
            '- Use the user’s own goal as the yardstick, not generic best practice.',
            '- If there are many possible improvements, choose the one that compounds the project fastest.',
            '',
            wantsThinkingPartner
                ? 'For this request, be especially opinionated. Give a clear personal verdict before structure.'
                : 'For this request, keep the persona light but still make concrete choices.',
            intent !== 'general' ? `Local project intent for tone: ${intent}` : ''
        ];

        if (intent === 'review-evaluation') {
            lines.push(
                '',
                'Review stance:',
                '- Do not merely list strengths and weaknesses. Say whether you would rely on this project today and under what constraint.',
                '- Prefer the product-owner question: "What has to become boring and reliable before this deserves expansion?"',
                '- If evidence is shallow, say which file would change your opinion most.'
            );
        }

        if (intent === 'thinking') {
            lines.push(
                '',
                'Thinking stance:',
                '- Do not solve every branch. Reduce the user’s uncertainty to the next decision.',
                '- A useful answer may say: "I would not expand yet" or "This deserves a spike, not a feature."'
            );
        }

        return lines.filter(Boolean).join('\n');
    }

    private shouldPreflightLocalProjectPath(prompt: string): boolean {
        const hasActionKeyword = /(검토|리뷰|분석|확인|봐줘|읽어|열어|파일|내용|코드|고쳐|개선|디버그|지식|문서화|문서|정리|기록|위키|저장|만들|생성|설계|아키텍처|구조|방향|의견|생각|판단|어떤\s*거?\s*같|어때|순서대로|보면|knowledge|document|documentation|wiki|summari[sz]e|review|analy[sz]e|inspect|debug|fix|improve|architecture|design|structure|opinion|think|judge|read|open|file|content|code)/i.test(prompt);
        const hasLocalPath = this.containsLocalFilePath(prompt);
        return hasActionKeyword && hasLocalPath;
    }

    /**
     * 프롬프트에 로컬 파일/디렉토리 경로가 포함되어 있는지 감지합니다.
     * POSIX 절대 경로: /Volumes/, /Users/, /home/, /opt/, ~/
     * Windows 절대 경로: C:\..., D:/..., \\server\share\...
     * 상대 경로: src/..., lib/..., components/..., tests/... 등 + 파일 확장자
     */
    private containsLocalFilePath(prompt: string): boolean {
        // 절대 경로 (POSIX + Windows 드라이브 문자 + UNC)
        if (AgentExecutor.ABS_PATH_RE.test(prompt) || AgentExecutor.WIN_ABS_PATH_RE.test(prompt)) {
            return true;
        }
        // 상대 경로 패턴: 디렉토리/파일명.확장자 형태 (src/lib/engine.ts, components\App.tsx 등)
        if (/(?:^|[\s,])(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)[\\/]/i.test(prompt)
            && /\.[a-z]{1,6}(?:[\s,;)\]]|$)/i.test(prompt)) {
            return true;
        }
        return false;
    }

    private isProjectKnowledgeCreationRequest(prompt: string): boolean {
        return this.classifyLocalProjectIntent(prompt) === 'knowledge-creation';
    }

    private isProjectReviewEvaluationRequest(prompt: string): boolean {
        return this.classifyLocalProjectIntent(prompt) === 'review-evaluation';
    }

    private classifyLocalProjectIntent(prompt: string): LocalProjectIntent {
        if (!this.containsLocalFilePath(prompt)) {
            return 'general';
        }

        const normalized = prompt.replace(/\s+/g, ' ').trim();
        const asksReview = /(코드\s*리뷰|코드리뷰|리뷰|검토|평가|봐줘|장점|단점|약점|강점|확장성|문제점|리스크|개선점|의견|판단|괜찮|어때|어떤\s*거?\s*같|review|evaluate|assessment|strength|weakness|pros?\s*and\s*cons?|extensibility|scalability|risk|issue)/i.test(normalized);
        if (asksReview) {
            return 'review-evaluation';
        }

        const asksImplementation = /(고쳐|수정|개선해|구현|추가|삭제|리팩토링|디버그|fix|implement|add|remove|refactor|debug)/i.test(normalized);
        if (asksImplementation) {
            return 'implementation';
        }

        const explicitKnowledgeCreation = /((?:이|그|현재|해당)?\s*(?:프로젝트|프로그램|코드베이스).{0,20}(?:대한|기반|관련).{0,20}지식.{0,12}(?:만들|생성|정리|문서화|기록|저장))|(지식.{0,12}(?:만들|생성|정리|문서화|기록|저장).{0,20}(?:프로젝트|프로그램|코드베이스))|(project\s+knowledge.{0,20}(?:create|generate|record|document|overview))|((?:create|generate|record|document).{0,20}project\s+knowledge)/i.test(normalized);
        if (explicitKnowledgeCreation) {
            return 'knowledge-creation';
        }

        const asksDocumentation = /(문서화(?:해|해줘|를)|문서(?:로)?\s*(?:정리|작성|만들)|README|가이드|wiki|documentation|document\s+this|write\s+docs)/i.test(normalized);
        if (asksDocumentation) {
            return 'documentation';
        }

        const asksThinking = /(설계|아키텍처|구조|방향|생각|의견|판단|어떤\s*거?\s*같|어때|architecture|design|structure|direction|opinion|think|judge)/i.test(normalized);
        if (asksThinking) {
            return 'thinking';
        }

        return 'general';
    }

    private isProjectKnowledgeFollowupRequest(prompt: string): boolean {
        return /(아키텍처|구조|조사|분석|설계|흐름|모듈|역할|개선|architecture|structure|design|flow|module|investigate|analy[sz]e)/i.test(prompt);
    }

    private isThinkingPartnerRequest(prompt: string): boolean {
        return /(어떤\s*거?\s*같|어때|어떻게\s*생각|의견|판단|방향|설계|아키텍처|구조|자비스|생각.*정리|갈림길|architecture|design|direction|opinion|think|judge)/i.test(prompt);
    }

    /**
     * Standalone greetings / acknowledgements / fillers — must match the *whole* (normalized) message.
     * "안녕, 이 프로젝트 분석해줘" is NOT here because the work intent makes it longer than one phrase.
     */
    private static readonly CASUAL_PHRASES = new Set<string>([
        // greetings
        '안녕', '안녕하세요', '안녕하십니까', '안뇽', '하이', '하잉', '헬로', '헬로우', 'hello', 'hi', 'hii', 'hey', 'yo', 'ㅎㅇ', 'ㅎㅇㅎㅇ', '굿모닝', 'good morning', 'morning', 'gm',
        // farewells
        '잘가', '잘가요', '안녕히', '안녕히가세요', '안녕히계세요', '바이', '바이바이', 'bye', 'byebye', 'bye bye', 'goodbye', 'good bye', '굿바이', '잘자', '잘자요', '굿나잇', 'good night', 'gn',
        // acknowledgements / affirmations
        '네', '넵', '넹', '예', '응', '웅', '음', '흠', '엄', '그래', '그렇구나', '그렇군', '그렇네', '오케이', '오케', 'ok', 'okay', 'okey', 'k', 'ㅇㅋ', 'ㅇㅇ', '알겠어', '알겠습니다', '알겠어요', '알았어', '알았다', '알았어요', 'yes', 'yeah', 'yep', 'yup', 'sure', '좋아', '좋아요', '좋네', '좋다', 'good', 'fine',
        // negations (still small talk — needs no RAG; the prior turn is already in the chat history)
        '아니', '아니요', '아니오', 'ㄴㄴ', 'no', 'nope', 'nah',
        // thanks / praise
        '고마워', '고마워요', '고맙습니다', '감사', '감사해요', '감사합니다', 'thanks', 'thank you', 'thx', 'ty', '굿', '굳', '굿잡', 'good job', '잘했어', '잘했네', '훌륭', '훌륭해', '대박', 'nice', 'cool', 'great', 'awesome', 'perfect', '완벽', '수고', '수고했어', '수고하셨습니다', '고생했어', '고생많았어',
        // laughs / fillers
        'lol', 'haha', 'hmm', 'hmmm', 'umm', 'uh',
    ]);

    private isCasualConversationPrompt(prompt: string): boolean {
        const normalized = (prompt || '')
            .trim()
            .replace(/\s+/g, ' ')
            .replace(/[~!?.,，。！？·…\s]+$/g, '')
            .toLowerCase();
        if (!normalized) return false;
        if (normalized.length > 40) return false;

        // Greetings, acknowledgements, and light conversational nudges should not trigger
        // Second Brain/RAG. Otherwise a single "안녕" can retrieve old project records and the
        // model answers that stale context instead of the user's actual greeting.
        if (AgentExecutor.CASUAL_PHRASES.has(normalized)) return true;
        if (/^[ㅋㅎ]{2,}$/.test(normalized)) return true;       // ㅋㅋ, ㅎㅎㅎ, ㅋㅎㅋㅎ
        if (/^(?:ha){2,}h?$|^(?:he){2,}h?$/.test(normalized)) return true; // haha, hahaha, hehe
        return false;
    }

    private isAstraModeArchitectureQuestion(prompt: string): boolean {
        const mentionsGuard = /\bguard\b|가드|Guard|Chronicle Guard|Project Chronicle/i.test(prompt);
        const mentionsMultiAgent = /\bMA\b|multi[-\s]?agent|멀티\s*에이전트|다중\s*에이전트|Planner|Researcher|Writer/i.test(prompt);
        const asksDecision = /(분리|통합|모드|사용|좋을까|맞을까|구조|설계|아키텍처|의견|판단|어때|어떤\s*거?\s*같|separate|combine|mode|architecture|design|opinion)/i.test(prompt);
        return asksDecision && mentionsGuard && mentionsMultiAgent;
    }

    private shouldUseMultiAgentWorkflow(prompt: string, configEnabled: boolean): boolean {
        if (!prompt || this.isAstraModeArchitectureQuestion(prompt)) {
            return false;
        }

        if (this.shouldPreflightLocalProjectPath(prompt)) {
            return false;
        }

        const complexByShape = prompt.length > 180 || /(보고서|심층|종합\s*분석|리서치|조사|전략\s*수립|기획안|제안서|roadmap|research|report|deep\s*analysis|strategy|proposal)/i.test(prompt);
        if (!complexByShape) {
            return false;
        }

        return configEnabled || /(보고서|심층|종합\s*분석|리서치|조사|전략\s*수립|기획안|제안서|research|report|deep\s*analysis|strategy|proposal)/i.test(prompt);
    }

    private buildAstraModeArchitectureContext(prompt: string): string {
        if (!this.isAstraModeArchitectureQuestion(prompt)) {
            return '';
        }

        return [
            '[ASTRA MODE ARCHITECTURE DECISION CONTEXT]',
            'The user is asking about Astra itself, specifically whether Guard mode and MA/Multi-Agent mode should remain separate.',
            '',
            'Confirmed implementation facts from the current codebase:',
            '- Guard is currently exposed as a sidebar toggle, but it defaults to enabled in the webview UI.',
            '- Guard context is built by buildProjectChronicleGuardContext(activeProject) and passed into AgentExecutor as designerContext.',
            '- In the normal single-agent path, designerContext is injected into the system prompt as [PROJECT CHRONICLE GUARD].',
            '- In the Multi-Agent path, designerContext is appended as Project Chronicle Guard context for the workflow manager.',
            '- Multi-Agent is an internal execution strategy. The legacy g1nation.multiAgentEnabled setting can still force it for complex prompts, but Astra may also select it automatically for report/research/strategy style tasks.',
            '- Current guardrail: Multi-Agent is not used for local project path preflight or Astra mode-design questions, because those need richer context assembly first.',
            '',
            'Product decision guidance:',
            '- Do not treat Guard and MA as two equal user-facing modes.',
            '- Guard should be an always-on policy/context layer: project target, evidence discipline, record hygiene, tone, and decision logging.',
            '- MA should be an optional execution strategy chosen automatically for genuinely complex tasks.',
            '- Recommended UX: hide or de-emphasize the Guard toggle, show it as Auto/On by default, and let Astra route between single-agent and MA internally.',
            '- Recommended answer: give a clear verdict that separating them as peer modes is not ideal; separate them internally by responsibility instead.',
            '- Mention the concrete risk that MA can currently bypass richer context assembly, so unifying the context preparation before routing is the next engineering step.'
        ].join('\n');
    }

    private buildJarvisProjectBriefContext(prompt: string, localPathContext: string, recentProjectKnowledgeContext: string): string {
        if (!this.isThinkingPartnerRequest(prompt)) {
            return '';
        }

        const sourceContext = localPathContext && localPathContext.includes('Access: succeeded')
            ? localPathContext
            : recentProjectKnowledgeContext;
        if (!sourceContext) {
            return [
                '[JARVIS PROJECT BRIEF]',
                'No concrete local project brief is available yet.',
                'Use the conversation and Second Brain cautiously. If the user asks about a project architecture, ask for or inspect the project path before making strong claims.',
                '',
                this.buildThinkingPartnerResponseContract()
            ].join('\n');
        }

        const projectPath = sourceContext.match(/Path:\s*(.+)/)?.[1]?.trim()
            || sourceContext.match(/Repository:\s*`([^`]+)`/)?.[1]?.trim()
            || sourceContext.match(/project evidence:\s*([^\s]+)/i)?.[1]?.trim()
            || 'current project';
        const evidenceFiles = sourceContext.includes('Priority file previews:')
            ? this.extractPriorityPreviewFiles(sourceContext).slice(0, 10)
            : this.extractEvidenceFilesFromProjectKnowledge(sourceContext).slice(0, 10);
        const treeMatch = sourceContext.match(/Scanned tree:\n([\s\S]*?)(?:\nPriority file previews:|$)/);
        const treePreview = treeMatch?.[1]?.trim().split('\n').slice(0, 30).join('\n') || '';

        return [
            '[JARVIS PROJECT BRIEF]',
            `Project evidence target: ${projectPath}`,
            evidenceFiles.length
                ? `Evidence files available:\n${evidenceFiles.map((file) => `- ${file}`).join('\n')}`
                : 'Evidence files available: not enough concrete file markers were found.',
            treePreview ? `Visible structure preview:\n${treePreview}` : '',
            '',
            this.buildThinkingPartnerResponseContract()
        ].filter(Boolean).join('\n');
    }

    private buildThinkingPartnerResponseContract(): string {
        return [
            'Thinking partner response contract:',
            '1. Start with a direct verdict, not a generic compliment.',
            '2. Separate confirmed facts from inferences.',
            '3. Name the strongest part of the direction and the weakest/missing part.',
            '4. Identify the real decision fork the user is facing.',
            '5. Suggest one small next action that would make the project direction clearer.',
            '6. If project evidence is thin, say what must be inspected next instead of pretending certainty.'
        ].join('\n');
    }

    private buildRequestHistory(history: ChatMessage[]): ChatMessage[] {
        return history.map((message) => {
            if (message.role !== 'assistant' || typeof message.content !== 'string') {
                return message;
            }

            return {
                ...message,
                content: this.sanitizeHistoryAssistantContent(message.content)
            };
        });
    }

    private sanitizeHistoryAssistantContent(content: string): string {
        return content
            .replace(/<details>\s*<summary>2nd Brain Trace:[\s\S]*?<\/details>/gi, '')
            .replace(/## Second Brain Debug JSON[\s\S]*?(?=\n## |\n# |$)/gi, '')
            .replace(/## Candidate records for this discussion[\s\S]*?(?=\n## |\n# |$)/gi, '')
            .replace(/## 후보 기록[\s\S]*?(?=\n## |\n# |$)/gi, '')
            .replace(/## 프로젝트 기록 검토[\s\S]*?(?=\n## |\n# |$)/gi, '')
            .replace(/\n{3,}/g, '\n\n')
            .trim();
    }

    private buildRecentProjectKnowledgeContext(prompt: string, rootPath: string): string {
        if (!rootPath || !this.isProjectKnowledgeFollowupRequest(prompt)) {
            return '';
        }

        const recordPath = this.findRecentProjectKnowledgeRecord(rootPath);
        if (!recordPath) {
            return '';
        }

        try {
            const content = fs.readFileSync(recordPath, 'utf8');
            return [
                '[RECENT LOCAL PROJECT KNOWLEDGE]',
                'The current user request appears to continue a previous local project knowledge discussion.',
                `Use this recently generated project knowledge record as project evidence: ${recordPath}`,
                'When answering, explicitly say that the analysis is based on the recently generated project knowledge record and local project structure. Do not imply that Second Brain Trace was the only evidence.',
                'If deeper architecture detail is needed, recommend reading the concrete source files next instead of asking for the project path again.',
                '',
                summarizeText(content, 5000)
            ].join('\n');
        } catch (error: any) {
            logError('Failed to load recent project knowledge record.', { recordPath, error: error?.message || String(error) });
            return '';
        }
    }

    private ensureRecentProjectKnowledgeEvidence(content: string, recentProjectKnowledgeContext: string): string {
        const recordPath = this.extractRecentProjectKnowledgeRecordPath(recentProjectKnowledgeContext);
        if (!recordPath || content.includes(recordPath)) {
            return content;
        }

        const evidenceFiles = this.extractEvidenceFilesFromProjectKnowledge(recentProjectKnowledgeContext).slice(0, 8);
        const evidenceSection = [
            '## 근거',
            `이번 답변은 최근 생성된 프로젝트 지식 기록과 로컬 프로젝트 구조를 기준으로 작성했습니다: \`${recordPath}\``,
            evidenceFiles.length
                ? `확인된 근거 파일:\n${evidenceFiles.map((file) => `- \`${file}\``).join('\n')}`
                : ''
        ].filter(Boolean).join('\n\n');

        return [
            content.trim(),
            '',
            evidenceSection
        ].join('\n');
    }

    private ensureLocalProjectPathEvidence(content: string, localPathContext: string): string {
        if (!localPathContext.includes('Access: succeeded') || content.includes('## 근거')) {
            return content;
        }

        const pathMatch = localPathContext.match(/Path:\s*(.+)/);
        const projectPath = pathMatch?.[1]?.trim();
        const evidenceFiles = this.extractPriorityPreviewFiles(localPathContext).slice(0, 10);
        if (!projectPath && evidenceFiles.length === 0) {
            return content;
        }

        const evidenceSection = [
            '## 근거',
            projectPath
                ? `이번 답변은 로컬 프로젝트 경로 \`${projectPath}\`에서 확인한 파일 구조와 코드 프리뷰를 기준으로 작성했습니다.`
                : '이번 답변은 확인된 로컬 프로젝트 파일 구조와 코드 프리뷰를 기준으로 작성했습니다.',
            evidenceFiles.length
                ? `확인된 근거 파일:\n${evidenceFiles.map((file) => `- \`${file}\``).join('\n')}`
                : ''
        ].filter(Boolean).join('\n\n');

        return [
            content.trim(),
            '',
            evidenceSection
        ].join('\n');
    }

    private extractRecentProjectKnowledgeRecordPath(recentProjectKnowledgeContext: string): string | null {
        return recentProjectKnowledgeContext.match(/project evidence:\s*(\/Volumes\/Data\/project\/Antigravity\/[^\s`"'<>]+\.md)/i)?.[1] || null;
    }

    private extractEvidenceFilesFromProjectKnowledge(recentProjectKnowledgeContext: string): string[] {
        const evidenceBlock = recentProjectKnowledgeContext.match(/## Evidence Files\n([\s\S]*?)(?=\n## |\n# |$)/i)?.[1] || '';
        const evidenceFiles = [...evidenceBlock.matchAll(/-\s+`([^`]+)`/g)].map((match) => match[1].trim());
        if (evidenceFiles.length > 0) {
            return Array.from(new Set(evidenceFiles));
        }

        const structureBlock = recentProjectKnowledgeContext.match(/## Confirmed Structure\n([\s\S]*?)(?=\n## |\n# |$)/i)?.[1] || '';
        return Array.from(new Set([...structureBlock.matchAll(/`([^`]+)`/g)]
            .map((match) => match[1].trim())
            .filter((value) => /[\\/]/.test(value) || /\.[a-z0-9]+$/i.test(value))));
    }

    private findRecentProjectKnowledgeRecord(rootPath: string): string | null {
        const fromHistory = [...this.chatHistory]
            .reverse()
            .map((message) => typeof message.content === 'string'
                ? message.content.match(/\/Volumes\/Data\/project\/Antigravity\/[^\s`"'<>]+_project_knowledge_overview\.md/i)?.[0]
                : undefined)
            .find(Boolean);
        if (fromHistory && fs.existsSync(fromHistory)) {
            return fromHistory;
        }

        const recordsRoot = path.join(rootPath, 'docs', 'records');
        if (!fs.existsSync(recordsRoot)) {
            return null;
        }

        const candidates: string[] = [];
        const visit = (dir: string, depth: number) => {
            if (depth > 5) return;
            let entries: fs.Dirent[] = [];
            try {
                entries = fs.readdirSync(dir, { withFileTypes: true });
            } catch {
                return;
            }
            for (const entry of entries) {
                const fullPath = path.join(dir, entry.name);
                if (entry.isDirectory()) {
                    if (!entry.name.startsWith('.')) visit(fullPath, depth + 1);
                    continue;
                }
                if (/_project_knowledge_overview\.md$/i.test(entry.name)) {
                    candidates.push(fullPath);
                }
            }
        };
        visit(recordsRoot, 0);

        return candidates
            .filter((file) => fs.existsSync(file))
            .sort((a, b) => fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs)[0] || null;
    }

    private extractLocalProjectPaths(prompt: string, rootPath?: string): string[] {
        const results: string[] = [];
        const stripTrailingPunct = (s: string) => s.replace(/[),.;\]]+$/g, '');

        // 1a. POSIX 절대 경로: /Volumes/, /Users/, /home/, /opt/, ~/
        const absMatches = prompt.match(new RegExp(POSIX_ABS_PATH_SRC, 'gi')) || [];
        for (const m of absMatches) {
            results.push(stripTrailingPunct(m));
        }
        // 1b. Windows 절대 경로: C:\..., D:/..., \\server\share\...
        const winMatches = prompt.match(new RegExp(WIN_ABS_PATH_SRC, 'gi')) || [];
        for (const m of winMatches) {
            results.push(stripTrailingPunct(m));
        }

        // 2. 상대 경로 감지: src/lib/engine.ts, components/App.tsx, src\lib\engine.ts 등
        const relMatches = prompt.match(/(?:^|[\s,])(?:(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)[\\/][^\s`"'<>]+\.[a-z]{1,6})/gi) || [];
        for (const m of relMatches) {
            const cleaned = m.trim().replace(/^,\s*/, '').replace(/[),.;\]]+$/g, '');
            if (rootPath) {
                // 상대 경로를 워크스페이스 기준 절대 경로로 변환
                const absPath = path.resolve(rootPath, cleaned);
                if (fs.existsSync(absPath)) {
                    results.push(absPath);
                } else {
                    // 프로젝트 루트 하위 프로젝트들에서도 검색
                    const subProjects = ['ConnectAI', 'Datacollector_MAC', 'Agent', 'skybound'];
                    let found = false;
                    for (const sub of subProjects) {
                        const subPath = path.resolve(rootPath, sub, cleaned);
                        if (fs.existsSync(subPath)) {
                            results.push(subPath);
                            found = true;
                            break;
                        }
                    }
                    if (!found) {
                        results.push(absPath); // fallback: 원래 경로 그대로
                    }
                }
            } else {
                results.push(cleaned);
            }
        }

        return Array.from(new Set(results));
    }

    private inspectLocalProjectPath(targetPath: string, rootPath: string): string {
        try {
            const absPath = validatePath(rootPath, targetPath);
            if (!fs.existsSync(absPath)) {
                return [
                    `Path: ${targetPath}`,
                    'Access: failed',
                    'Reason: path does not exist in the current environment.'
                ].join('\n');
            }

            const stat = fs.statSync(absPath);
            if (!stat.isDirectory()) {
                const content = fs.readFileSync(absPath, 'utf8');
                const fileName = path.basename(absPath);
                const ext = path.extname(absPath).toLowerCase();
                // 코드/문서 파일은 더 많은 내용을 제공하여 정밀한 분석이 가능하도록 함
                const isCodeOrDoc = ['.ts', '.js', '.tsx', '.jsx', '.py', '.java', '.go', '.rs', '.md', '.json', '.yaml', '.yml', '.toml', '.css', '.html', '.sql', '.sh', '.zsh', '.env', '.xml', '.swift', '.kt'].includes(ext);
                const previewLimit = isCodeOrDoc ? 8000 : 2000;
                return [
                    `Path: ${targetPath}`,
                    'Access: succeeded',
                    `Type: file (${fileName})`,
                    `Size: ${content.length} characters`,
                    `Full content (${content.length <= previewLimit ? 'complete' : `first ${previewLimit} chars`}):\n\`\`\`${ext.slice(1)}\n${summarizeText(content, previewLimit)}\n\`\`\``
                ].join('\n');
            }

            const tree = this.listProjectTree(absPath, absPath, 0, 4, 140);
            const priorityFiles = this.findPriorityProjectFiles(absPath).slice(0, 12);
            const previews = priorityFiles.map((file) => {
                try {
                    const content = fs.readFileSync(file, 'utf8');
                    return [
                        `File: ${path.relative(absPath, file)}`,
                        summarizeText(content, 2200)
                    ].join('\n');
                } catch (error: any) {
                    return `File: ${path.relative(absPath, file)}\nRead failed: ${error.message}`;
                }
            }).join('\n\n');

            return [
                `Path: ${targetPath}`,
                'Access: succeeded',
                'Type: directory',
                `Scanned tree:\n${tree || '(no visible files found)'}`,
                priorityFiles.length > 0
                    ? `Priority file previews:\n${previews}`
                    : 'Priority file previews: no package, README, docs, src, or config files found in the first scan.'
            ].join('\n');
        } catch (error: any) {
            return [
                `Path: ${targetPath}`,
                'Access: failed',
                `Reason: ${error.message}`
            ].join('\n');
        }
    }

    private enforceLocalPathReviewAnswer(content: string, localPathContext: string): string {
        if (!localPathContext.includes('Access: succeeded')) {
            return content;
        }

        const asksForUpload = /(코드(?:를|가)?\s*업로드|파일(?:을|를)?\s*업로드|소스\s*코드(?:를)?\s*업로드|코드를 제공|파일을 제공|핵심 파일(?:이나|과|을|를)?.*제공|파일 목록(?:이나|과|을|를)?.*제공|구조(?:를|와|나)?.*제공|자료(?:를|가)?.*필요|folder path is not enough|upload (?:the )?(?:source )?code|please provide (?:the )?files|먼저 분석할까요|살펴볼까요)/i.test(content);
        const deniesCodeAccess = /(실제 코드 내용이 없|코드 내용이 없|코드가 없|코드를 볼 수 없|소스 코드를 볼 수 없|실제 구현 자료가 없|실제 구현 근거 없이는|현재로서는.*자료가 없|기술적인 진단.*수 없습니다|코드를 읽어야만|파일 구조만으로는.*판단할 수 없|코드의 논리적 흐름.*판단할 수 없)/i.test(content);
        if (!asksForUpload && !deniesCodeAccess) {
            return content;
        }

        const header = [
            '## 경로 확인 결과',
            '',
            '제공된 로컬 프로젝트 경로에는 접근할 수 있고, 코드 파일도 일부 확인되었습니다. 만약 추가적인 코드 확인이 필요하다면 <read_file> 이나 <list_files> 액션 태그를 즉시 사용하여 스스로 파일을 읽어보고 분석을 진행하겠습니다.',
            '',
            '이전 응답에서 "파일을 제공해주세요" 라거나 "먼저 분석할까요?" 라고 묻는 것은 잘못된 안내입니다. 액션 태그를 통해 스스로 필요한 코드를 열어보겠습니다.'
        ].join('\n');

        return [
            header,
            '',
            content
                .replace(/.*(?:코드(?:를|가)?\s*업로드|파일(?:을|를)?\s*업로드|소스\s*코드(?:를)?\s*업로드|코드를 제공|파일을 제공).*$/gmi, '')
                .replace(/.*(?:핵심 파일(?:이나|과|을|를)?.*제공|파일 목록(?:이나|과|을|를)?.*제공|구조(?:를|와|나)?.*제공|자료(?:를|가)?.*필요).*$/gmi, '')
                .replace(/.*(?:실제 코드 내용이 없|코드 내용이 없|코드가 없|코드를 볼 수 없|소스 코드를 볼 수 없|실제 구현 자료가 없|실제 구현 근거 없이는|현재로서는.*자료가 없|코드를 읽어야만|파일 구조만으로는.*판단할 수 없).*$/gmi, '')
                .replace(/.*(?:먼저 분석할까요|살펴볼까요).*$/gmi, '')
                .trim()
        ].filter(Boolean).join('\n\n');
    }

    private isBlockingProjectKnowledgeAnswer(content: string): boolean {
        return /(블로킹 질문|어떤 기능 영역|어떤 부분.*먼저|어떤 기능이나 아키텍처|구체적인 방향|방향 설정이 필요|명확히 알려주시면|우선적으로 정리|최종 사용 목적|Question reason|별도의 파일 기록.*생성되지|파일 기록이 생성되지|더 깊이 있는 분석.*지정|해당 기능.*지정하여 요청)/i.test(content);
    }

    private isMisroutedProjectKnowledgeAnswer(content: string): boolean {
        return /(기본 지식 생성 방향|바로 만들 지식 초안|Project Knowledge Overview|프로젝트 지식 1번 문서|프로젝트 지식 기록을 생성|프로젝트 지식.*만들면|지식 생성 작업)/i.test(content);
    }

    private buildProjectKnowledgeFallbackAnswer(localPathContext: string, record?: { filePath: string; relativePath: string } | null): string {
        const pathMatch = localPathContext.match(/Path:\s*(.+)/);
        const projectPath = pathMatch?.[1]?.trim() || '제공된 로컬 프로젝트 경로';
        const projectDisplayName = this.getProjectDisplayName(projectPath);
        const treeMatch = localPathContext.match(/Scanned tree:\n([\s\S]*?)(?:\nPriority file previews:|$)/);
        const treePreview = treeMatch?.[1]?.trim().split('\n').slice(0, 18).join('\n') || '';
        const priorityMatches = this.extractPriorityPreviewFiles(localPathContext).slice(0, 10);
        const priorityText = priorityMatches.length
            ? priorityMatches.map((file) => `- ${file}`).join('\n')
            : '- package.json, src, docs, config 계열 파일을 우선 확인';

        return [
            '## 간단 요약',
            '맞아요. 이 경우에는 추가 질문으로 멈출 필요 없이, 지금 확인된 로컬 프로젝트 구조를 기준으로 기본 프로젝트 지식을 바로 만들면 됩니다.',
            '',
            '## 기본 지식 생성 방향',
            `대상 프로젝트는 \`${projectPath}\`입니다. 우선 MVP 지식은 “프로젝트 개요 + 주요 모듈 + 확인된 근거 파일 + 다음에 깊게 볼 영역” 형태로 만드는 것이 가장 안전합니다.`,
            '',
            '## 확인된 근거',
            priorityText,
            '',
            treePreview ? `## 확인된 구조 일부\n\`\`\`text\n${treePreview}\n\`\`\`` : '',
            '',
            '## 바로 만들 지식 초안',
            '```markdown',
            `# ${projectDisplayName} Project Knowledge Overview`,
            '',
            '## Purpose',
            `${projectDisplayName}는 VS Code 안에서 로컬 AI 에이전트, Second Brain, 프로젝트 기록, 에이전트 스킬을 연결하는 개발 보조 프로젝트다.`,
            '',
            '## Confirmed Structure',
            '- `src/agent.ts`: 에이전트 실행, 로컬 경로 프리플라이트, Second Brain Trace, 액션 실행 흐름의 중심.',
            '- `src/sidebarProvider.ts`: Webview UI, 브레인/모델/프로젝트 선택, 프롬프트 전달, 기록 UI를 담당.',
            '- `src/features/secondBrainTrace.ts`: Second Brain 검색 결과와 근거 정책을 구성.',
            '- `src/features/projectChronicle/`: 프로젝트 기록을 Markdown으로 관리하는 Chronicle 기능.',
            '- `src/core/`: 큐, 이벤트, 트랜잭션, 오류 처리 등 실행 안정성 계층.',
            '- `tests/`: Second Brain, 로컬 경로 프리플라이트, Chronicle, 보안/트랜잭션 회귀 테스트.',
            '',
            '## Current Knowledge Gap',
            '- 전체 아키텍처는 파일 구조와 일부 프리뷰 기준으로 파악 가능하지만, 세부 동작 지식은 `src/agent.ts`, `src/sidebarProvider.ts`, `secondBrainTrace.ts`, `projectChronicle` 순서로 심화 분석해 보강해야 한다.',
            '',
            '## Recommended Next Record',
            `- \`docs/records/${path.basename(projectPath)}/development/YYYY-MM-DD_${projectDisplayName.toLowerCase()}_project_knowledge_overview.md\``,
            '```',
            '',
            '## 다음 액션',
            record
                ? `프로젝트 지식 1번 문서를 생성했습니다: \`${record.filePath}\``
                : '기본값으로는 위 초안을 프로젝트 지식 1번 문서로 저장하고, 그 다음 `agent.ts` 실행 흐름 지식을 별도 문서로 쪼개는 것이 좋습니다.'
        ].filter(Boolean).join('\n');
    }

    private extractPriorityPreviewFiles(localPathContext: string): string[] {
        const fileMarkerMatches = [...localPathContext.matchAll(/^File:\s*(.+)$/gmi)]
            .map((match) => match[1].trim());
        if (fileMarkerMatches.length > 0) {
            return Array.from(new Set(fileMarkerMatches));
        }

        const previewBlock = localPathContext.match(/Priority file previews:\n([\s\S]*)/)?.[1] || '';
        return Array.from(new Set([...previewBlock.matchAll(/^###\s+(.+)$/gmi)]
            .map((match) => match[1].trim())
            .filter((value) => /[\\/]/.test(value) || /\.[a-z0-9]+$/i.test(value))));
    }

    private writeProjectKnowledgeRecord(localPathContext: string): { filePath: string; relativePath: string } | null {
        const pathMatch = localPathContext.match(/Path:\s*(.+)/);
        const projectPath = pathMatch?.[1]?.trim();
        if (!projectPath || !localPathContext.includes('Access: succeeded')) return null;

        try {
            const projectName = path.basename(projectPath);
            const projectDisplayName = this.getProjectDisplayName(projectPath);
            const today = new Date().toISOString().slice(0, 10);
            const slug = projectDisplayName.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '') || 'project';
            const relativePath = path.join('docs', 'records', projectName, 'development', `${today}_${slug}_project_knowledge_overview.md`);
            const filePath = path.join(projectPath, relativePath);
            fs.mkdirSync(path.dirname(filePath), { recursive: true });
            fs.writeFileSync(filePath, this.buildProjectKnowledgeMarkdown(localPathContext), 'utf8');
            return { filePath, relativePath };
        } catch (error: any) {
            logError('Failed to write project knowledge record.', { error: error?.message || String(error) });
            return null;
        }
    }

    private buildProjectKnowledgeMarkdown(localPathContext: string): string {
        const pathMatch = localPathContext.match(/Path:\s*(.+)/);
        const projectPath = pathMatch?.[1]?.trim() || 'Unknown project path';
        const projectName = path.basename(projectPath);
        const projectDisplayName = this.getProjectDisplayName(projectPath);
        const treeMatch = localPathContext.match(/Scanned tree:\n([\s\S]*?)(?:\nPriority file previews:|$)/);
        const treePreview = treeMatch?.[1]?.trim().split('\n').slice(0, 80).join('\n') || '';
        const priorityFiles = this.extractPriorityPreviewFiles(localPathContext);

        return [
            `# ${projectDisplayName} Project Knowledge Overview`,
            '',
            `Date: ${new Date().toISOString()}`,
            `Project: ${projectDisplayName}`,
            `Repository: \`${projectPath}\``,
            '',
            '## Purpose',
            `${projectDisplayName}는 VS Code 안에서 로컬 AI 에이전트, Second Brain, 프로젝트 기록, 에이전트 스킬을 연결하는 개발 보조 프로젝트다.`,
            '',
            '## Confirmed Structure',
            '- `src/agent.ts`: 에이전트 실행, 로컬 경로 프리플라이트, Second Brain Trace, 액션 실행 흐름의 중심.',
            '- `src/sidebarProvider.ts`: Webview UI, 브레인/모델/프로젝트 선택, 프롬프트 전달, 기록 UI를 담당.',
            '- `src/features/secondBrainTrace.ts`: Second Brain 검색 결과와 근거 정책을 구성.',
            '- `src/features/projectChronicle/`: 프로젝트 기록을 Markdown으로 관리하는 Chronicle 기능.',
            '- `src/core/`: 큐, 이벤트, 트랜잭션, 오류 처리 등 실행 안정성 계층.',
            '- `tests/`: Second Brain, 로컬 경로 프리플라이트, Chronicle, 보안/트랜잭션 회귀 테스트.',
            '',
            '## Evidence Files',
            ...(priorityFiles.length ? priorityFiles.map((file) => `- \`${file}\``) : ['- 확인된 우선 파일 없음']),
            '',
            '## Scanned Tree Excerpt',
            '```text',
            treePreview || '(no scanned tree captured)',
            '```',
            '',
            '## Current Knowledge Gap',
            '- 전체 아키텍처는 파일 구조와 일부 프리뷰 기준으로 파악 가능하지만, 세부 동작 지식은 `src/agent.ts`, `src/sidebarProvider.ts`, `secondBrainTrace.ts`, `projectChronicle` 순서로 심화 분석해 보강해야 한다.',
            '',
            '## Next Records',
            '- `agent.ts` 실행 흐름 상세 분석',
            '- Second Brain Trace 검색 및 근거 정책 분석',
            '- Project Chronicle 기록 생성 흐름 분석'
        ].join('\n');
    }

    private getProjectDisplayName(projectPath: string): string {
        const projectName = path.basename(projectPath);
        return /^connectai$/i.test(projectName) ? 'Astra' : projectName;
    }

    private listProjectTree(root: string, current: string, depth: number, maxDepth: number, limit: number): string {
        if (limit <= 0 || depth > maxDepth) {
            return '';
        }

        let entries: fs.Dirent[] = [];
        try {
            entries = fs.readdirSync(current, { withFileTypes: true })
                .filter((entry) => !entry.name.startsWith('.') && !EXCLUDED_DIRS.has(entry.name))
                .sort((a, b) => Number(b.isDirectory()) - Number(a.isDirectory()) || a.name.localeCompare(b.name));
        } catch {
            return '';
        }

        const lines: string[] = [];
        for (const entry of entries) {
            if (lines.length >= limit) break;
            const fullPath = path.join(current, entry.name);
            const relative = path.relative(root, fullPath);
            lines.push(`${'  '.repeat(depth)}${relative}${entry.isDirectory() ? '/' : ''}`);
            if (entry.isDirectory() && depth < maxDepth) {
                const child = this.listProjectTree(root, fullPath, depth + 1, maxDepth, limit - lines.length);
                if (child) {
                    lines.push(child);
                }
            }
        }

        return lines.join('\n');
    }

    private findPriorityProjectFiles(root: string): string[] {
        const exactNames = new Set([
            'package.json',
            'README.md',
            'readme.md',
            'tsconfig.json',
            'vite.config.ts',
            'vite.config.js',
            'next.config.js',
            'next.config.mjs',
            'webpack.config.js'
        ]);
        const results: string[] = [];
        const visit = (dir: string, depth: number, inSourceArea: boolean) => {
            if (depth > 6 || results.length >= 24) return;
            let entries: fs.Dirent[] = [];
            try {
                entries = fs.readdirSync(dir, { withFileTypes: true })
                    .filter((entry) => !entry.name.startsWith('.') && !EXCLUDED_DIRS.has(entry.name));
            } catch {
                return;
            }

            for (const entry of entries) {
                const fullPath = path.join(dir, entry.name);
                if (entry.isDirectory()) {
                    const nextInSourceArea = inSourceArea || /^(src|app|pages|components|docs|lib|server|backend|frontend|config|features|core|hooks|systems|store|model|utils|ui|api)$/i.test(entry.name);
                    if (nextInSourceArea) {
                        visit(fullPath, depth + 1, nextInSourceArea);
                    }
                    continue;
                }

                const relative = path.relative(root, fullPath);
                const isSourceCode = /\.(ts|tsx|js|jsx)$/i.test(entry.name);
                if (
                    exactNames.has(entry.name)
                    || (inSourceArea && isSourceCode)
                    || /(^|[\\/])(src|app|pages|components|docs|lib|server|backend|frontend|features|core)[\\/].+\.(ts|tsx|js|jsx|md|json)$/i.test(relative)
                    || /\.(config|rc)\.(js|ts|json)$/i.test(entry.name)
                ) {
                    results.push(fullPath);
                }
            }
        };

        visit(root, 0, false);
        return Array.from(new Set(results)).sort((a, b) => {
            const rank = (file: string) => {
                const relative = path.relative(root, file);
                if (path.basename(file) === 'package.json') return 0;
                if (/readme\.md$/i.test(file)) return 1;
                if (/^src[\\/]App\.tsx$/i.test(relative)) return 2;
                if (/^src[\\/]main\.tsx$/i.test(relative)) return 3;
                if (/^src[\\/]features[\\/]game[\\/]hooks[\\/]useGameEngine\.ts$/i.test(relative)) return 4;
                if (/^src[\\/]features[\\/]game[\\/]systems[\\/]/i.test(relative)) return 5;
                if (/^src[\\/]features[\\/]game[\\/]ui[\\/]/i.test(relative)) return 6;
                if (/^src[\\/]/i.test(relative)) return 7;
                if (/^docs[\\/]|\.md$/i.test(relative)) return 8;
                return 9;
            };
            return rank(a) - rank(b) || a.localeCompare(b);
        });
    }

    private async buildMemoryContext(currentPrompt: string, activeBrain: BrainProfile, agentSkillFile?: string): Promise<string> {
        const config = getConfig();
        this._lastRetrievalInfo = null;
        this._lastLessonContents = [];
        this._lastKnowledgeMix = null;
        if (!config.memoryEnabled) return '';

        // Update memory manager config in case settings changed
        this.memoryManager.updateConfig({
            enabled: config.memoryEnabled,
            shortTermLimit: config.memoryShortTermMessages,
        });

        const visibleHistory = this.chatHistory.filter((message) => !message.internal);
        const workspaceFolders = vscode.workspace.workspaceFolders;
        const workspacePath = workspaceFolders ? workspaceFolders[0].uri.fsPath : undefined;

        // Resolve scope folders from the agent ↔ knowledge map. When the user
        // hasn't selected an agent (or the selection has no mapping), `folders`
        // is empty and the orchestrator falls back to whole-brain search —
        // keeping the legacy behavior intact.
        const scope = resolveScopeForAgent(agentSkillFile, activeBrain.localBrainPath);

        // Scale retrieval/memory budget with the configured context window so
        // that raising g1nation.contextLength actually gives the RAG pipeline
        // more room. At 32K context we keep the legacy 8K total (≈3.2K
        // retrieval); at 230K we allocate ~57K total (≈23K retrieval). Capped
        // at 80K so scoring stays fast on huge contexts.
        const scaledTotalBudget = Math.min(
            80000,
            Math.max(8000, Math.floor(config.contextLength * 0.25))
        );

        // Pull recent session summaries for the medium-term layer. We read
        // from the sidebar's persisted store directly (same key it writes to)
        // to avoid threading another callback through the agent constructor.
        const rawSessions = this.context.globalState.get<any[]>('chat_sessions', []) || [];
        const recentSessions = compactRecentSessions(
            rawSessions,
            this.currentTaskId,
            Math.max(0, config.memoryMediumTermSessions ?? 0)
        );

        // Hybrid retrieval (optional): when the user has configured an
        // embedding model, fetch a query embedding so searchBrainFiles can
        // blend cosine similarity with TF-IDF. Time-bounded — if the
        // embedding endpoint is slow or down, we fall through with no
        // embedding and the retriever stays in pure-TF-IDF mode.
        let queryEmbedding: number[] | undefined;
        if (config.embeddingModel) {
            const EMBED_QUERY_TIMEOUT_MS = 4000;
            try {
                queryEmbedding = await Promise.race([
                    embedQuery(currentPrompt, { baseUrl: config.ollamaUrl, model: config.embeddingModel }),
                    new Promise<undefined>((resolve) => setTimeout(() => resolve(undefined), EMBED_QUERY_TIMEOUT_MS)),
                ]);
            } catch {
                queryEmbedding = undefined;
            }
        }

        // Resolve the Knowledge Mix weight for this turn (per-agent → global → default).
        // The weight scales how many brain files we feed the retriever and how big a
        // slice of the context budget RAG can claim. At weight=50 the numbers below
        // equal the legacy defaults, so users who never touch the slider see no change.
        const knowledgeMix = resolveKnowledgeMix(agentSkillFile);
        this._lastKnowledgeMix = knowledgeMix;
        const mixedBrainFileLimit = mapWeightToBrainFileLimit(knowledgeMix.weight, config.memoryLongTermFiles);
        const mixedRetrievalRatio = mapWeightToRetrievalRatio(knowledgeMix.weight);

        // Use the Unified RAG Pipeline
        const result = this.retrievalOrchestrator.retrieve(currentPrompt, {
            brain: activeBrain,
            memoryManager: this.memoryManager,
            workspacePath,
            chatHistory: visibleHistory,
            contextBudget: {
                totalBudget: scaledTotalBudget,
                retrievalRatio: mixedRetrievalRatio,
            },
            brainFileLimit: mixedBrainFileLimit,
            scopeFolders: scope.folders,
            recentSessions,
            mediumTermLimit: config.memoryMediumTermSessions ?? 0,
            queryEmbedding,
            embeddingModel: config.embeddingModel || undefined,
            embeddingBlendAlpha: config.embeddingBlendAlpha,
        });

        // Fire-and-forget background embedding for the files we just scored.
        // Embeds only files that lack a vector for the current model — so
        // steady-state turns do no embedding work. The next turn benefits.
        if (config.embeddingModel) {
            const scoredFilePaths = result.selectedChunks
                .filter((c) => c.source === 'brain-memory' && c.metadata.filePath)
                .map((c) => c.metadata.filePath!)
                .filter((p, i, arr) => arr.indexOf(p) === i);
            if (scoredFilePaths.length > 0) {
                void backfillBrainEmbeddings(
                    activeBrain.localBrainPath,
                    scoredFilePaths,
                    config.embeddingModel,
                    (texts) => embedTexts(texts, { baseUrl: config.ollamaUrl, model: config.embeddingModel }),
                );
            }
        }

        // Stash what actually fed this turn so handlePrompt can show it under the answer.
        const brainRoot = activeBrain.localBrainPath;
        const rel = (p?: string) => (p ? (path.relative(brainRoot, p) || p) : '');
        const lessonChunks = result.lessonChunks || [];
        this._lastRetrievalInfo = {
            agentName: scope.agent?.name ?? null,
            scoped: scope.folders.length > 0,
            source: String((scope as any).source ?? ''),
            configuredFolders: scope.folders.map((abs) => rel(abs)),
            usedBrainFiles: result.selectedChunks
                .filter((c) => c.source === 'brain-memory' && c.metadata.filePath)
                .map((c) => rel(c.metadata.filePath))
                .filter((p, i, arr) => p && arr.indexOf(p) === i),
            usedMemoryLayers: Array.from(new Set(
                result.selectedChunks
                    .filter((c) => c.source !== 'brain-memory' && c.source !== 'brain-trace')
                    .map((c) => c.source as string)
            )),
            lessonFiles: lessonChunks.map((c) => rel(c.metadata.filePath)).filter((p, i, arr) => p && arr.indexOf(p) === i),
            totalChunks: result.totalChunks,
            selectedChunks: result.selectedChunks.length,
        };

        this._lastLessonContents = lessonChunks.map((c) => c.content);
        // Lessons go ahead of the regular RAG context (and ahead of [CONTEXT] in the system prompt),
        // so they're prominent and survive context-overflow truncation.
        const lessonBlock = buildLessonChecklistBlock(lessonChunks.map((c) => ({ title: c.title, content: c.content })));
        const memoryBlock = this.retrievalOrchestrator.buildContextString(result);
        return [lessonBlock, memoryBlock].filter(Boolean).join('\n\n');
    }

    private emitHistoryChanged() {
        if (!this.historyChangeListener) return;

        // Save session whenever history changes
        this.sessionManager.saveSession(
            this.currentTaskId,
            this.chatHistory,
            this.context.workspaceState.get<string>('lastActionStr')
        );

        Promise.resolve(this.historyChangeListener(this.getHistory())).catch((error: any) => {
            logError('History change listener failed.', { error: error?.message || String(error) });
        });
    }

    /**
     * 세션 종료 시 5-Layer Memory에 자동 추출을 수행합니다.
     * 새 채팅 시작 또는 Extension 비활성화 시 호출됩니다.
     */
    public onSessionEnd(): void {
        try {
            const workspaceFolders = vscode.workspace.workspaceFolders;
            const workspacePath = workspaceFolders ? workspaceFolders[0].uri.fsPath : undefined;

            this.memoryManager.onSessionEnd(
                this.currentTaskId,
                this.chatHistory.filter((m) => !m.internal),
                workspacePath
            );
            logInfo('Memory extraction completed for session end.', { taskId: this.currentTaskId });
            recordTelemetry({
                kind: 'session-end',
                note: `taskId=${this.currentTaskId} messages=${this.chatHistory.filter((m) => !m.internal).length}`,
            });
            // Fire-and-forget LLM compression: turns the raw transcript into a
            // 2–3 sentence summary that medium-term retrieval can use instead
            // of just "first user msg + last assistant 200 chars". Cheap call
            // (~256 output tokens), runs in the background so it never blocks
            // the next chat turn.
            void this.compressSessionSummary(this.currentTaskId, this.chatHistory.slice());
        } catch (error: any) {
            logError('Memory extraction failed on session end.', { error: error?.message || String(error) });
        }
    }

    /**
     * Compress a finished session into a short summary and persist it to the
     * session record. The summary is later read by `compactRecentSessions` so
     * the medium-term memory layer carries a real recap instead of a fragment.
     *
     * Skips sessions with fewer than 3 visible messages — they're typically
     * single-question pings where the raw first message is already a good
     * summary. Failures are logged and swallowed: a missing summary just
     * falls back to the legacy "first user msg" representation.
     */
    private async compressSessionSummary(taskId: string, history: ChatMessage[]): Promise<void> {
        const visible = history.filter((m) => !m.internal && (m.role === 'user' || m.role === 'assistant'));
        if (visible.length < 3) return;
        const cfg = getConfig();
        const transcript = visible
            .map((m) => `${m.role.toUpperCase()}: ${String(m.content).replace(/\s+/g, ' ').slice(0, 400)}`)
            .join('\n\n');
        const messages: ChatMessage[] = [
            {
                role: 'system',
                content: [
                    'You compress chat transcripts into a 2-3 sentence summary.',
                    'Capture: (1) the user\'s topic or task, (2) the main decision or answer reached, (3) any open issue.',
                    'Reply in the user\'s primary language (mirror Korean ↔ English exactly as in the transcript).',
                    'Reply with ONLY the summary text. No headers, no quotes, no preamble.',
                ].join(' '),
                internal: true,
            },
            { role: 'user', content: `[TRANSCRIPT]\n${transcript}\n[END]` },
        ];
        try {
            const result = await this.callNonStreaming({
                baseUrl: cfg.ollamaUrl,
                modelName: cfg.defaultModel,
                engine: resolveEngine(cfg.ollamaUrl),
                messages,
                temperature: 0.3,
                maxTokens: 256,
                contextLength: cfg.contextLength,
            });
            const summary = (result.text || '').trim().replace(/^["'`]+|["'`]+$/g, '');
            if (!summary || summary.length < 12) return;
            const sessions = this.context.globalState.get<any[]>('chat_sessions', []) || [];
            const idx = sessions.findIndex((s) => String(s?.id) === String(taskId));
            if (idx < 0) return;
            sessions[idx].summary = summary;
            await this.context.globalState.update('chat_sessions', sessions);
            logInfo('Session summary stored for medium-term recall.', { taskId, length: summary.length });
        } catch (e: any) {
            logError('Session summary compression failed.', { taskId, error: e?.message ?? String(e) });
        }
    }

    private async createStreamingRequest(params: {
        baseUrl: string;
        modelName: string;
        reqMessages: ChatMessage[];
        temperature: number;
        /** Dynamic output-token cap computed from the remaining context budget. */
        maxTokens?: number;
        /** Model context window in tokens (used for Ollama's num_ctx). */
        contextLength?: number;
    }): Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string }> {
        const { baseUrl, modelName, reqMessages, temperature } = params;
        const maxTokens = Math.max(256, params.maxTokens ?? 4096);
        const numCtx = Math.max(2048, params.contextLength ?? 32768);
        const engine = resolveEngine(baseUrl);  // 사용자가 설정한 엔진만 사용
        const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
        const messageVariants = this.buildEngineMessageVariants(reqMessages, engine);
        const modelCandidates = this.buildModelCandidates(modelName, engine);
        let lastError: Error | null = null;

        // 같은 엔진 내에서만 model candidate / message variant retry
        for (const candidateModel of modelCandidates) {
            for (const variant of messageVariants) {
                const streamBody = {
                    model: candidateModel,
                    messages: variant.messages,
                    stream: true,
                    ...(engine === 'lmstudio'
                        ? { max_tokens: maxTokens, temperature }
                        : { options: { num_ctx: numCtx, num_predict: maxTokens, temperature } }),
                };

                // 일시적 네트워크 오류용 retry (최대 2회, 지수 backoff)
                const MAX_RETRIES = 2;
                for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
                    try {
                        if (attempt > 0) {
                            const delay = 500 * Math.pow(2, attempt - 1);  // 500ms, 1000ms
                            await new Promise(r => setTimeout(r, delay));
                            logInfo('AI streaming request retry.', { engine, attempt, model: candidateModel });
                        }
                        logInfo('AI streaming request started.', {
                            engine, apiUrl, model: candidateModel,
                            variant: variant.name, messageCount: variant.messages.length,
                            attempt
                        });
                        const response = await fetch(apiUrl, {
                            method: 'POST',
                            headers: {
                                'Content-Type': 'application/json',
                                'Accept': 'text/event-stream',
                                'Cache-Control': 'no-cache',
                                'Connection': 'keep-alive'
                            },
                            body: JSON.stringify(streamBody),
                            signal: this.abortController?.signal,
                            keepalive: true
                        });

                        if (!response.ok) {
                            const errText = await response.text();
                            lastError = new Error(`AI Engine error (${engine}/${variant.name}): ${response.status} - ${summarizeText(errText, 300)}`);
                            logError('AI streaming request returned non-OK status.', {
                                engine, variant: variant.name, apiUrl,
                                status: response.status, body: summarizeText(errText, 500)
                            });
                            // 4xx는 재시도해도 의미없음. 5xx만 재시도.
                            if (response.status >= 400 && response.status < 500) break;
                            continue;
                        }

                        logInfo('AI streaming request connected.', { engine, variant: variant.name, apiUrl });
                        return { response, engine, apiUrl };
                    } catch (error: any) {
                        lastError = error instanceof Error ? error : new Error(String(error));
                        // AbortError는 사용자가 취소한 것이므로 retry 금지
                        if (lastError.name === 'AbortError') {
                            throw lastError;
                        }
                        logError('AI streaming request failed.', {
                            engine, variant: variant.name, apiUrl, model: candidateModel,
                            attempt, error: lastError.message
                        });
                    }
                }
            }
        }

        // 명확한 에러 메시지: 어느 엔진이 실패했는지 사용자에게 알림
        const engineLabel = engine === 'lmstudio' ? 'LM Studio' : 'Ollama';
        throw new Error(
            `${engineLabel} 엔진에 연결할 수 없습니다. ` +
            `${engineLabel}가 실행 중이고 모델 '${modelName}'이 로드되어 있는지 확인하세요. ` +
            `(원인: ${lastError?.message || 'unknown'})`
        );
    }

    /**
     * Non-streaming chat completion. Used as a recovery path when the
     * streaming endpoint returns an empty response — common with LM Studio
     * when a model is mid-load or the SSE channel drops.
     *
     * The body is consumed via `await response.text()` (single read), so
     * there's no ReadableStream lock to release and no chance of the
     * "lock() request could not be registered" error this method is helping
     * to avoid.
     */
    private async callNonStreaming(params: {
        baseUrl: string;
        modelName: string;
        engine: 'lmstudio' | 'ollama';
        messages: ChatMessage[];
        temperature: number;
        maxTokens?: number;
        contextLength?: number;
        signal?: AbortSignal;
    }): Promise<{ text: string; stopReason?: string }> {
        const { baseUrl, modelName, engine, messages, temperature, signal } = params;
        const maxTokens = Math.max(256, params.maxTokens ?? 4096);
        const numCtx = Math.max(2048, params.contextLength ?? 32768);
        const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
        const variants = this.buildEngineMessageVariants(messages, engine);
        const body = {
            model: modelName,
            messages: variants[0].messages,
            stream: false,
            ...(engine === 'lmstudio'
                ? { max_tokens: maxTokens, temperature }
                : { options: { num_ctx: numCtx, num_predict: maxTokens, temperature } }),
        };
        const response = await fetch(apiUrl, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify(body),
            signal,
        });
        if (!response.ok) {
            const errText = await response.text().catch(() => '');
            throw new Error(`Non-streaming fallback returned ${response.status}: ${summarizeText(errText, 200)}`);
        }
        const text = await response.text();
        try {
            const json = JSON.parse(text);
            if (engine === 'lmstudio') {
                return {
                    text: json?.choices?.[0]?.message?.content ?? '',
                    stopReason: json?.choices?.[0]?.finish_reason,
                };
            }
            return {
                text: json?.message?.content ?? json?.response ?? '',
                stopReason: json?.done_reason ?? (json?.done === true ? 'stop' : undefined),
            };
        } catch {
            return { text: '' };
        }
    }

    /**
     * Single streaming call used by progressive answering (live-delta main
     * stream + auto-continuation rounds). Mirrors the main streaming block in
     * handlePrompt but without the empty-stream recovery / non-streaming
     * fallback machinery — those only matter for the very first generation.
     *
     * When `postLiveDeltas` is true, every token is also forwarded to the
     * webview as a `streamChunk`, giving the user a real-time view of the
     * answer (and of continuation rounds) instead of one big drop at the end.
     *
     * Returns the accumulated text and the final stop reason. Aborts and
     * stale runs surface as `aborted: true` and an empty/partial text — the
     * caller decides what to do with that.
     */
    private async streamChatOnce(params: {
        runId: number;
        useLmStudioSdk: boolean;
        engine: 'lmstudio' | 'ollama';
        ollamaUrl: string;
        modelName: string;
        messages: ChatMessage[];
        temperature: number;
        maxTokens: number;
        contextLength: number;
        contextOverflowPolicy: 'stopAtLimit' | 'truncateMiddle' | 'rollingWindow';
        signal: AbortSignal;
        postLiveDeltas: boolean;
    }): Promise<{ text: string; stopReason?: string; aborted: boolean }> {
        let accumulated = '';
        let finishStopReason: string | undefined;
        const post = (token: string) => {
            if (params.postLiveDeltas && token) {
                this.webview?.postMessage({ type: 'streamChunk', value: token });
            }
        };

        if (params.useLmStudioSdk) {
            try {
                const stream = this.options.lmStudioStreamer!.stream({
                    modelName: params.modelName,
                    messages: params.messages.map((m) => ({ role: m.role, content: m.content })),
                    temperature: params.temperature,
                    maxTokens: params.maxTokens,
                    contextOverflowPolicy: params.contextOverflowPolicy,
                    signal: params.signal,
                });
                for await (const { token, stopReason } of stream) {
                    if (this.isStaleRun(params.runId)) {
                        return { text: accumulated, stopReason: finishStopReason, aborted: true };
                    }
                    if (token) {
                        accumulated += token;
                        post(token);
                    }
                    if (stopReason) finishStopReason = stopReason;
                }
            } catch (err: any) {
                if (err?.name === 'AbortError' || params.signal.aborted) {
                    return { text: accumulated, stopReason: finishStopReason, aborted: true };
                }
                const msg = err?.message ?? String(err);
                if (/context\s*length|contextlengthreached|exceed|too\s*long/i.test(msg)) {
                    finishStopReason = 'contextLengthReached';
                }
                logError('streamChatOnce SDK path failed.', { engine: params.engine, error: msg });
                throw err;
            }
            return { text: accumulated, stopReason: finishStopReason, aborted: false };
        }

        const request = await this.createStreamingRequest({
            baseUrl: params.ollamaUrl,
            modelName: params.modelName,
            reqMessages: params.messages,
            temperature: params.temperature,
            maxTokens: params.maxTokens,
            contextLength: params.contextLength,
        });
        const reader = request.response.body?.getReader();
        if (!reader) throw new Error('Response body is not readable.');
        const decoder = new TextDecoder();
        let buffer = '';
        const consumeJsonLine = (line: string) => {
            const trimmed = line.trim();
            if (!trimmed || trimmed === 'data: [DONE]') return;
            try {
                const raw = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
                const json = JSON.parse(raw);
                const token = params.engine === 'lmstudio'
                    ? json.choices?.[0]?.delta?.content || ''
                    : json.message?.content || json.response || '';
                if (token) {
                    accumulated += token;
                    post(token);
                }
                const fr = params.engine === 'lmstudio'
                    ? json.choices?.[0]?.finish_reason
                    : (json.done_reason ?? (json.done === true ? 'stop' : undefined));
                if (fr) finishStopReason = fr;
            } catch (e: any) {
                logError('streamChatOnce: failed to parse chunk.', { engine: params.engine, chunk: summarizeText(trimmed, 200), error: e?.message ?? String(e) });
            }
        };
        try {
            while (true) {
                const { done, value } = await reader.read();
                if (done) break;
                if (this.isStaleRun(params.runId)) {
                    return { text: accumulated, stopReason: finishStopReason, aborted: true };
                }
                buffer += decoder.decode(value, { stream: true });
                const lines = buffer.split('\n');
                buffer = lines.pop() || '';
                for (const line of lines) consumeJsonLine(line);
            }
            if (buffer.trim()) consumeJsonLine(buffer);
        } catch (err: any) {
            if (err?.name === 'AbortError') {
                return { text: accumulated, stopReason: finishStopReason, aborted: true };
            }
            logError('streamChatOnce REST path failed.', { engine: params.engine, error: err?.message ?? String(err) });
            throw err;
        } finally {
            try { reader.releaseLock(); } catch { /* already released on abort */ }
        }
        return { text: accumulated, stopReason: finishStopReason, aborted: false };
    }

    private normalizeMessages(messages: ChatMessage[]) {
        return messages.map((message) => {
            const normalizedContent = typeof message.content === 'string'
                ? message.content
                : JSON.stringify(message.content);

            const result: any = {
                role: message.role,
                content: normalizedContent
            };
            // Ollama Vision: images 필드 보존
            if ((message as any).images) {
                result.images = (message as any).images;
            }
            return result;
        });
    }

    private buildEngineMessageVariants(messages: ChatMessage[], engine: 'lmstudio' | 'ollama') {
        const normalized = this.normalizeMessages(messages);
        if (engine !== 'lmstudio') {
            return [{ name: 'native', messages: normalized }];
        }

        const flattened = normalized.map((message) => {
            if (message.role === 'system') {
                return {
                    role: 'user' as const,
                    content: `[System Instruction - do not answer this message]\n${message.content}`
                };
            }

            return message;
        });

        return [
            { name: 'native-system', messages: normalized },
            { name: 'flattened-system-fallback', messages: flattened }
        ];
    }

    private buildModelCandidates(modelName: string, engine: 'lmstudio' | 'ollama'): string[] {
        const candidates = [modelName];
        if (engine === 'lmstudio') {
            const baseModel = modelName.replace(/:\d+$/, '');
            if (baseModel && baseModel !== modelName) {
                candidates.push(baseModel);
            }
        }
        return candidates;
    }

    private async executeActions(aiMessage: string, rootPath: string, activeBrain: BrainProfile): Promise<string[]> {
        const report: string[] = [];
        let brainModified = false;
        const activeBrainDir = activeBrain.localBrainPath;
        let firstCreatedFile: string | undefined;

        try {
            this.transactionManager.begin();

            // Action 1: Create File
            const createRegex = /<create_file\s+path=['"]?([^'"]+)['"]?>([\s\S]*?)<\/create_file>/gi;
            let match;
            while ((match = createRegex.exec(aiMessage)) !== null) {
                const relPath = match[1].trim();
                const content = match[2].trim();
                try {
                    const absPath = validatePath(rootPath, relPath);
                    await this.transactionManager.record(absPath);

                    fs.mkdirSync(path.dirname(absPath), { recursive: true });
                    fs.writeFileSync(absPath, content, 'utf-8');

                    report.push(`✅ Created: ${relPath}`);
                    if (!firstCreatedFile) firstCreatedFile = absPath;
                    if (absPath.startsWith(activeBrainDir)) brainModified = true;
                } catch (err: any) {
                    throw new FileSystemError(`Failed to create file ${relPath}: ${err.message}`, relPath, err);
                }
            }

            // Action 2: Edit File
            const editRegex = /<edit_file\s+path=['"]?([^'"]+)['"]?>([\s\S]*?)<\/edit_file>/gi;
            while ((match = editRegex.exec(aiMessage)) !== null) {
                const relPath = match[1].trim();
                const editContent = match[2].trim();
                try {
                    const absPath = validatePath(rootPath, relPath);
                    if (fs.existsSync(absPath)) {
                        await this.transactionManager.record(absPath);

                        let currentContent = fs.readFileSync(absPath, 'utf-8');
                        const searchMatch = editContent.match(/<search>([\s\S]*?)<\/search>\s*<replace>([\s\S]*?)<\/replace>/i);

                        if (searchMatch) {
                            const searchStr = searchMatch[1];
                            const replaceStr = searchMatch[2];
                            if (currentContent.includes(searchStr)) {
                                currentContent = currentContent.replace(searchStr, replaceStr);
                                fs.writeFileSync(absPath, currentContent, 'utf-8');
                                report.push(`📝 Updated: ${relPath}`);
                            } else {
                                report.push(`⚠️ Search string not found in ${relPath}`);
                            }
                        } else {
                            fs.writeFileSync(absPath, editContent, 'utf-8');
                            report.push(`📝 Updated (Full): ${relPath}`);
                        }
                        if (absPath.startsWith(activeBrainDir)) brainModified = true;
                    } else {
                        report.push(`❌ File not found: ${relPath}`);
                    }
                } catch (err: any) {
                    throw new FileSystemError(`Failed to edit file ${relPath}: ${err.message}`, relPath, err);
                }
            }

            // Action 3: Delete File
            const deleteRegex = /<delete_file\s+path=['"]?([^'"]+)['"]?\s*\/?>(?:<\/delete_file>)?/gi;
            while ((match = deleteRegex.exec(aiMessage)) !== null) {
                const relPath = match[1].trim();
                try {
                    const absPath = validatePath(rootPath, relPath);
                    if (fs.existsSync(absPath)) {
                        await this.transactionManager.record(absPath);
                        fs.unlinkSync(absPath);
                        report.push(`🗑 Deleted: ${relPath}`);
                    } else {
                        report.push(`⚠️ Delete failed: ${relPath} not found`);
                    }
                } catch (err: any) {
                    throw new FileSystemError(`Failed to delete file ${relPath}: ${err.message}`, relPath, err);
                }
            }

            // Action 4: Read File (Non-state-changing, no transaction record needed)
            const readRegex = /<read_file\s+path=['"]?([^'"]+)['"]?\s*\/?>(?:<\/read_file>)?/gi;
            while ((match = readRegex.exec(aiMessage)) !== null) {
                const relPath = match[1].trim();
                try {
                    const absPath = validatePath(rootPath, relPath);
                    if (fs.existsSync(absPath)) {
                        const content = fs.readFileSync(absPath, 'utf-8');
                        const preview = content.length > 8000 ? content.slice(0, 8000) + "\n... (truncated)" : content;
                        report.push(`📖 Read: ${relPath}`);
                        this.chatHistory.push({ role: 'system', content: `[Result of read_file ${relPath}]\n\`\`\`\n${preview}\n\`\`\``, internal: true });
                    } else {
                        report.push(`❌ Read failed: ${relPath} not found`);
                    }
                } catch (err: any) { report.push(`❌ Error Reading ${relPath}: ${err.message}`); }
            }

            // Action 5: Run Command
            const cmdRegex = /<run_command>([\s\S]*?)<\/run_command>/gi;
            while ((match = cmdRegex.exec(aiMessage)) !== null) {
                const cmd = match[1].trim();
                try {
                    const safeCmd = sanitizeCommand(cmd);
                    const terminal = vscode.window.terminals.find(t => t.name === 'Astra Terminal') || vscode.window.createTerminal({ name: 'Astra Terminal', cwd: rootPath });
                    terminal.show();
                    terminal.sendText(safeCmd);
                    report.push(`🚀 Executed: ${safeCmd}`);
                } catch (err: any) { report.push(`❌ Blocked: ${err.message}`); }
            }

            // Action 6: List Files
            const listRegex = /<list_files\s+path=['"]?([^'"]+)['"]?\s*\/?>(?:<\/list_files>)?/gi;
            while ((match = listRegex.exec(aiMessage)) !== null) {
                const relPath = match[1].trim() || '.';
                try {
                    const absPath = validatePath(rootPath, relPath);
                    if (fs.existsSync(absPath) && fs.statSync(absPath).isDirectory()) {
                        const entries = fs.readdirSync(absPath, { withFileTypes: true });
                        let listing = entries
                            .filter(e => !e.name.startsWith('.') && !EXCLUDED_DIRS.has(e.name))
                            .map(e => e.isDirectory() ? `${e.name}/` : e.name)
                            .join('\n');

                        if (listing.length > 5000) {
                            listing = listing.slice(0, 5000) + "\n... (truncated for context)";
                        }

                        report.push(`📂 Listed: ${relPath}`);
                        this.chatHistory.push({ role: 'system', content: `[Result of list_files ${relPath}]\n${listing}`, internal: true });
                    }
                } catch (err: any) { report.push(`❌ Listing failed: ${err.message}`); }
            }

            // Action 7: Second Brain Knowledge (List/Read)
            const listBrainRegex = /<list_brain\s*path=['"]?([^'"]*)['"]?\s*\/?>(?:<\/list_brain>)?/gi;
            while ((match = listBrainRegex.exec(aiMessage)) !== null) {
                const relPath = match[1].trim() || '.';
                try {
                    const brainDir = activeBrainDir;
                    const absPath = path.join(brainDir, relPath);
                    if (fs.existsSync(absPath) && fs.statSync(absPath).isDirectory()) {
                        const entries = fs.readdirSync(absPath, { withFileTypes: true });
                        let listing = entries
                            .filter(e => !e.name.startsWith('.') && !EXCLUDED_DIRS.has(e.name))
                            .map(e => e.isDirectory() ? `${e.name}/` : e.name)
                            .join('\n');

                        if (listing.length > 5000) {
                            listing = listing.slice(0, 5000) + "\n... (truncated for context)";
                        }

                        report.push(`🧠 Brain Listed: ${relPath}`);
                        this.chatHistory.push({ role: 'system', content: `[Result of list_brain ${relPath}]\n${listing}`, internal: true });
                    } else {
                        report.push(`❌ Brain List failed: ${relPath} not found`);
                    }
                } catch (err: any) { report.push(`❌ Error Listing Brain: ${err.message}`); }
            }

            const brainRegex = /<read_brain>([\s\S]*?)<\/read_brain>/gi;
            while ((match = brainRegex.exec(aiMessage)) !== null) {
                const fileName = match[1].trim();
                try {
                    const brainDir = activeBrainDir;
                    const files = findBrainFiles(brainDir);
                    const targetFile = files.find((f: string) => path.basename(f) === fileName || f.endsWith(fileName));

                    if (targetFile && fs.existsSync(targetFile)) {
                        const content = fs.readFileSync(targetFile, 'utf-8');
                        report.push(`🧠 Brain Read: ${fileName}`);
                        this.chatHistory.push({ role: 'system', content: `[Result of read_brain ${fileName}]\n\`\`\`\n${content}\n\`\`\``, internal: true });
                    } else {
                        report.push(`❌ Brain Read failed: ${fileName} not found in Second Brain`);
                    }
                } catch (err: any) { report.push(`❌ Error Reading Brain: ${err.message}`); }
            }

            // Action 8: Read URL
            const urlRegex = /<read_url>([\s\S]*?)<\/read_url>/gi;
            while ((match = urlRegex.exec(aiMessage)) !== null) {
                const url = match[1].trim();
                try {
                    const res = await fetch(url, { signal: AbortSignal.timeout(10000) });
                    const text = await res.text();
                    const content = text.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
                    const preview = content.length > 5000 ? content.slice(0, 5000) + "\n... (truncated)" : content;
                    report.push(`🌐 Read URL: ${url}`);
                    this.chatHistory.push({ role: 'system', content: `[Result of read_url ${url}]\n${preview}`, internal: true });
                } catch (err: any) { report.push(`❌ URL Read failed: ${err.message}`); }
            }

            if (firstCreatedFile) {
                // Always open file results in the editor group (column 2) — the ConnectAI
                // sidebar lives in column 3 and we don't want freshly-written files to
                // hijack the chat panel.
                vscode.window.showTextDocument(vscode.Uri.file(firstCreatedFile), {
                    preview: false,
                    viewColumn: vscode.ViewColumn.Two,
                });
            }

            // Brain Sync Logic
            if (brainModified && shouldAutoPushBrain() && activeBrain.secondBrainRepo) {
                this.syncBrain(activeBrainDir);
            }

            const config = getConfig();
            if (config.dryRun) {
                report.push(`\n⚠️ **Dry Run Mode Active**: 위 변경 사항을 확인하고 [승인] 또는 [롤백]을 선택해주세요.`);
                this.webview?.postMessage({ type: 'requiresApproval' });
                // Mirror the inline-chat approval into the queue feeding the dedicated panel + status bar.
                const queue = this.options.approvalQueue;
                if (queue) {
                    const recorded = this.transactionManager.getRecordedFiles();
                    queue.enqueue(
                        {
                            id: `txn-${Date.now()}`,
                            kind: 'transaction',
                            title: 'Pending file changes',
                            summary: `${recorded.length}개 파일 변경 대기 중`,
                            files: recorded.map(r => r.path),
                            createdAt: Date.now(),
                        },
                        {
                            approve: () => this.approveTransaction(),
                            reject: () => this.rejectTransaction(),
                        }
                    );
                }
                // Do NOT commit yet
            } else {
                this.transactionManager.commit();
            }
        } catch (error: any) {
            this.transactionManager.rollback();
            const g1Error = error instanceof AgentExecutionError ? error : new AgentExecutionError(error.message, error);
            report.push(`🛑 Transaction Failed: ${g1Error.message}. All file changes rolled back.`);
            logError('Action execution failed, rolled back.', g1Error);
            // A failed-and-rolled-back action is a strong "something went wrong" signal — offer to record a lesson.
            this.webview?.postMessage({ type: 'lessonCandidate', value: { trigger: 'rollback', reason: g1Error.message } });
            // We return the report with the failure message instead of throwing
            // so the agent can see the failure and decide what to do next
        }
        return report;
    }

    private syncBrain(brainDir: string) {
        try {
            const { execSync } = require('child_process');
            execSync(`git add .`, { cwd: brainDir });
            execSync(`git commit -m "[Astra] Knowledge Update"`, { cwd: brainDir });
            execSync(`git push`, { cwd: brainDir });
        } catch (err) {
            logError('Second Brain sync failed.', err);
        }
    }
}