feat: v2.2.92 → v2.2.158 — god-file 분해 + Stocks feature + 대화 연속성

R56–R59: agent.ts 2731→1529줄 god-file 분해 (25 modules) · attrParsers + LLM 메서드 8개 (callNonStreaming, streamChatOnce 등) · executeActions 415줄 → 8 handler 그룹 (file/run/list/brain/calendar/sheets/tasks) · handlePrompt 1100줄 → 7 phase 모듈 (system prompt + budget + autoContinue 등) R50–R55: extension.ts 1145→349줄 (telegram/settings/provider commands 분리) Stocks feature 신규: /stocks slash command (v2.2.152~158) · .astra/stocks.json 저장소 + Yahoo Finance 현재가 갱신 · 8 키워드 필터 (ROE/성장성/유동성/수익성/영업효율/기술력/안정성/PBR) · Naver 시가총액 페이지 JSON API (m.stock.naver.com) 발굴 · LLM Top 5 매력도 분석 + Telegram 자동 보고서 · KST 09:00/15:00 watcher 자동 모니터링 대화 연속성 (v2.2.150~157): · [PRIOR TURN CONCLUSION] block 으로 직전 결론 anchor · thin follow-up 분류 → boilerplate 헤더 suppression · slash 명령 결과 chatHistory mirror (capture wrapper) · echo/parrot 금지 system prompt rule 기타: /stocks 슬래시 자동완성 dropdown UI, Naver JSON API 전환 (cheerio 제거) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 09:59:32 +09:00
parent 4153f640c2
commit 0a97324f1b
149 changed files with 14628 additions and 6927 deletions
@@ -0,0 +1,54 @@
+import * as fs from 'fs';
+import * as path from 'path';
+import { HandlerContext } from './types';
+import { findBrainFiles } from '../../utils';
+import { EXCLUDED_DIRS } from '../../config';
+
+export async function applyBrainOpsActions(ctx: HandlerContext): Promise<void> {
+    const { aiMessage, activeBrainDir, report } = ctx;
+    let match: RegExpExecArray | null;
+
+    // Action 7: Second Brain Knowledge (List/Read)
+    const listBrainRegex = /<list_brain\s*path=['"]?([^'"]*)['"]?\s*\/?>(?:<\/list_brain>)?/gi;
+    while ((match = listBrainRegex.exec(aiMessage)) !== null) {
+        const relPath = match[1].trim() || '.';
+        try {
+            const brainDir = activeBrainDir;
+            const absPath = path.join(brainDir, relPath);
+            if (fs.existsSync(absPath) && fs.statSync(absPath).isDirectory()) {
+                const entries = fs.readdirSync(absPath, { withFileTypes: true });
+                let listing = entries
+                    .filter(e => !e.name.startsWith('.') && !EXCLUDED_DIRS.has(e.name))
+                    .map(e => e.isDirectory() ? `${e.name}/` : e.name)
+                    .join('\n');
+
+                if (listing.length > 5000) {
+                    listing = listing.slice(0, 5000) + "\n... (truncated for context)";
+                }
+
+                report.push(`🧠 Brain Listed: ${relPath}`);
+                ctx.chatHistory.push({ role: 'system', content: `[Result of list_brain ${relPath}]\n${listing}`, internal: true });
+            } else {
+                report.push(`❌ Brain List failed: ${relPath} not found`);
+            }
+        } catch (err: any) { report.push(`❌ Error Listing Brain: ${err.message}`); }
+    }
+
+    const brainRegex = /<read_brain>([\s\S]*?)<\/read_brain>/gi;
+    while ((match = brainRegex.exec(aiMessage)) !== null) {
+        const fileName = match[1].trim();
+        try {
+            const brainDir = activeBrainDir;
+            const files = findBrainFiles(brainDir);
+            const targetFile = files.find((f: string) => path.basename(f) === fileName || f.endsWith(fileName));
+
+            if (targetFile && fs.existsSync(targetFile)) {
+                const content = fs.readFileSync(targetFile, 'utf-8');
+                report.push(`🧠 Brain Read: ${fileName}`);
+                ctx.chatHistory.push({ role: 'system', content: `[Result of read_brain ${fileName}]\n\`\`\`\n${content}\n\`\`\``, internal: true });
+            } else {
+                report.push(`❌ Brain Read failed: ${fileName} not found in Second Brain`);
+            }
+        } catch (err: any) { report.push(`❌ Error Reading Brain: ${err.message}`); }
+    }
+}
@@ -0,0 +1,43 @@
+import { HandlerContext } from './types';
+import { _parseCalEventAttrs } from '../attrParsers';
+
+export async function applyCalendarActions(ctx: HandlerContext): Promise<void> {
+    const { aiMessage, report } = ctx;
+    let match: RegExpExecArray | null;
+    // Action 8: Create Calendar Event (OAuth) — agent 가 회의록·작업 분석 후 일정 자동 생성.
+    // 형식: <create_calendar_event title="..." start="2026-05-21T14:00" duration="60" location="...">설명</create_calendar_event>
+    // 속성: title (필수), start (필수, ISO 'YYYY-MM-DDTHH:MM' 또는 timezone 포함),
+    //       end | duration (분, default 60), location, all_day (true/false)
+    const calRegex = /<create_calendar_event\b([^>]*)>([\s\S]*?)<\/create_calendar_event>/gi;
+    while ((match = calRegex.exec(aiMessage)) !== null) {
+        const attrs = _parseCalEventAttrs(match[1]);
+        const desc = match[2].trim();
+        if (!attrs.title || !attrs.start) {
+            report.push(`❌ Calendar Event: title / start 누락`);
+            continue;
+        }
+        try {
+            const { createCalendarEvent } = await import('../../features/calendar');
+            const r = await createCalendarEvent(ctx.context, {
+                title: attrs.title,
+                start: attrs.start,
+                end: attrs.end,
+                durationMinutes: attrs.duration,
+                location: attrs.location,
+                description: desc || undefined,
+                allDay: attrs.allDay,
+            });
+            if (r.ok) {
+                report.push(`📅 Calendar Event Created: ${r.event.title} (${r.event.startIso})`);
+                // chatHistory 에 결과 주입 — agent 가 다음 답변에서 link 인용 가능.
+                ctx.chatHistory.push({
+                    role: 'system',
+                    content: `[Calendar event created] ${r.event.title} · ${r.event.startIso}\nLink: ${r.event.htmlLink}`,
+                    internal: true,
+                });
+            } else {
+                report.push(`❌ Calendar Event Failed: ${r.error}`);
+            }
+        } catch (err: any) { report.push(`❌ Calendar Event Error: ${err?.message ?? String(err)}`); }
+    }
+}
@@ -0,0 +1,73 @@
+import * as fs from 'fs';
+import * as path from 'path';
+import { validatePath } from '../../security';
+import { FileSystemError } from '../../core/errors';
+import { HandlerContext } from './types';
+
+/**
+ * `<create_file>` + `<edit_file>` action handler.
+ *
+ * AI 가 한 턴에 여러 개의 create/edit 태그를 내뱉을 수 있으므로, 각 태그마다
+ * regex 로 잡아서 순서대로 처리한다. validatePath() 로 sandbox 보장 + 매 파일
+ * 쓰기 전에 transactionManager.record() 로 롤백 지점 기록.
+ */
+export async function applyFileCreateEditActions(ctx: HandlerContext): Promise<void> {
+    const { aiMessage, rootPath, activeBrainDir, report } = ctx;
+
+    // Action 1: Create File
+    const createRegex = /<create_file\s+path=['"]?([^'"]+)['"]?>([\s\S]*?)<\/create_file>/gi;
+    let match;
+    while ((match = createRegex.exec(aiMessage)) !== null) {
+        const relPath = match[1].trim();
+        const content = match[2].trim();
+        try {
+            const absPath = validatePath(rootPath, relPath);
+            await ctx.transactionManager.record(absPath);
+
+            fs.mkdirSync(path.dirname(absPath), { recursive: true });
+            fs.writeFileSync(absPath, content, 'utf-8');
+
+            report.push(`✅ Created: ${relPath}`);
+            ctx.setFirstCreated(absPath);
+            if (absPath.startsWith(activeBrainDir)) ctx.markBrainModified();
+        } catch (err: any) {
+            throw new FileSystemError(`Failed to create file ${relPath}: ${err.message}`, relPath, err);
+        }
+    }
+
+    // Action 2: Edit File
+    const editRegex = /<edit_file\s+path=['"]?([^'"]+)['"]?>([\s\S]*?)<\/edit_file>/gi;
+    while ((match = editRegex.exec(aiMessage)) !== null) {
+        const relPath = match[1].trim();
+        const editContent = match[2].trim();
+        try {
+            const absPath = validatePath(rootPath, relPath);
+            if (fs.existsSync(absPath)) {
+                await ctx.transactionManager.record(absPath);
+
+                let currentContent = fs.readFileSync(absPath, 'utf-8');
+                const searchMatch = editContent.match(/<search>([\s\S]*?)<\/search>\s*<replace>([\s\S]*?)<\/replace>/i);
+
+                if (searchMatch) {
+                    const searchStr = searchMatch[1];
+                    const replaceStr = searchMatch[2];
+                    if (currentContent.includes(searchStr)) {
+                        currentContent = currentContent.replace(searchStr, replaceStr);
+                        fs.writeFileSync(absPath, currentContent, 'utf-8');
+                        report.push(`📝 Updated: ${relPath}`);
+                    } else {
+                        report.push(`⚠️ Search string not found in ${relPath}`);
+                    }
+                } else {
+                    fs.writeFileSync(absPath, editContent, 'utf-8');
+                    report.push(`📝 Updated (Full): ${relPath}`);
+                }
+                if (absPath.startsWith(activeBrainDir)) ctx.markBrainModified();
+            } else {
+                report.push(`❌ File not found: ${relPath}`);
+            }
+        } catch (err: any) {
+            throw new FileSystemError(`Failed to edit file ${relPath}: ${err.message}`, relPath, err);
+        }
+    }
+}
@@ -0,0 +1,44 @@
+import * as fs from 'fs';
+import { validatePath } from '../../security';
+import { FileSystemError } from '../../core/errors';
+import { HandlerContext } from './types';
+
+export async function applyFileDeleteReadActions(ctx: HandlerContext): Promise<void> {
+    const { aiMessage, rootPath, report } = ctx;
+    let match;
+
+    // Action 3: Delete File
+    const deleteRegex = /<delete_file\s+path=['"]?([^'"]+)['"]?\s*\/?>(?:<\/delete_file>)?/gi;
+    while ((match = deleteRegex.exec(aiMessage)) !== null) {
+        const relPath = match[1].trim();
+        try {
+            const absPath = validatePath(rootPath, relPath);
+            if (fs.existsSync(absPath)) {
+                await ctx.transactionManager.record(absPath);
+                fs.unlinkSync(absPath);
+                report.push(`🗑 Deleted: ${relPath}`);
+            } else {
+                report.push(`⚠️ Delete failed: ${relPath} not found`);
+            }
+        } catch (err: any) {
+            throw new FileSystemError(`Failed to delete file ${relPath}: ${err.message}`, relPath, err);
+        }
+    }
+
+    // Action 4: Read File (Non-state-changing, no transaction record needed)
+    const readRegex = /<read_file\s+path=['"]?([^'"]+)['"]?\s*\/?>(?:<\/read_file>)?/gi;
+    while ((match = readRegex.exec(aiMessage)) !== null) {
+        const relPath = match[1].trim();
+        try {
+            const absPath = validatePath(rootPath, relPath);
+            if (fs.existsSync(absPath)) {
+                const content = fs.readFileSync(absPath, 'utf-8');
+                const preview = content.length > 8000 ? content.slice(0, 8000) + "\n... (truncated)" : content;
+                report.push(`📖 Read: ${relPath}`);
+                ctx.chatHistory.push({ role: 'system', content: `[Result of read_file ${relPath}]\n\`\`\`\n${preview}\n\`\`\``, internal: true });
+            } else {
+                report.push(`❌ Read failed: ${relPath} not found`);
+            }
+        } catch (err: any) { report.push(`❌ Error Reading ${relPath}: ${err.message}`); }
+    }
+}
@@ -0,0 +1,31 @@
+import * as fs from 'fs';
+import { HandlerContext } from './types';
+import { validatePath } from '../../security';
+import { EXCLUDED_DIRS } from '../../config';
+
+export async function applyListFilesActions(ctx: HandlerContext): Promise<void> {
+    const { aiMessage, rootPath, report } = ctx;
+    let match: RegExpExecArray | null;
+    // Action 6: List Files
+    const listRegex = /<list_files\s+path=['"]?([^'"]+)['"]?\s*\/?>(?:<\/list_files>)?/gi;
+    while ((match = listRegex.exec(aiMessage)) !== null) {
+        const relPath = match[1].trim() || '.';
+        try {
+            const absPath = validatePath(rootPath, relPath);
+            if (fs.existsSync(absPath) && fs.statSync(absPath).isDirectory()) {
+                const entries = fs.readdirSync(absPath, { withFileTypes: true });
+                let listing = entries
+                    .filter(e => !e.name.startsWith('.') && !EXCLUDED_DIRS.has(e.name))
+                    .map(e => e.isDirectory() ? `${e.name}/` : e.name)
+                    .join('\n');
+
+                if (listing.length > 5000) {
+                    listing = listing.slice(0, 5000) + "\n... (truncated for context)";
+                }
+
+                report.push(`📂 Listed: ${relPath}`);
+                ctx.chatHistory.push({ role: 'system', content: `[Result of list_files ${relPath}]\n${listing}`, internal: true });
+            }
+        } catch (err: any) { report.push(`❌ Listing failed: ${err.message}`); }
+    }
+}
@@ -0,0 +1,20 @@
+import * as vscode from 'vscode';
+import { HandlerContext } from './types';
+import { sanitizeCommand } from '../../security';
+
+export async function applyRunCommandActions(ctx: HandlerContext): Promise<void> {
+    const { aiMessage, rootPath, report } = ctx;
+    let match: RegExpExecArray | null;
+    // Action 5: Run Command
+    const cmdRegex = /<run_command>([\s\S]*?)<\/run_command>/gi;
+    while ((match = cmdRegex.exec(aiMessage)) !== null) {
+        const cmd = match[1].trim();
+        try {
+            const safeCmd = sanitizeCommand(cmd);
+            const terminal = vscode.window.terminals.find(t => t.name === 'Astra Terminal') || vscode.window.createTerminal({ name: 'Astra Terminal', cwd: rootPath });
+            terminal.show();
+            terminal.sendText(safeCmd);
+            report.push(`🚀 Executed: ${safeCmd}`);
+        } catch (err: any) { report.push(`❌ Blocked: ${err.message}`); }
+    }
+}
@@ -0,0 +1,87 @@
+import { HandlerContext } from './types';
+import { _parseSheetAttrs } from '../attrParsers';
+
+export async function applySheetsActions(ctx: HandlerContext): Promise<void> {
+    const { aiMessage, report } = ctx;
+    let match: RegExpExecArray | null;
+
+    // Action 10/11/12: Google Sheets read / write / append.
+    // 모두 spreadsheet_id (속성) + range (속성) 필수. write/append 는 본문이 TSV.
+    //   <read_sheet spreadsheet_id="1abc..." range="Sheet1!A1:D20"/>
+    //   <write_sheet spreadsheet_id="1abc..." range="Sheet1!A1">
+    //     이름\t나이\t직책
+    //     민지\t29\t디자이너
+    //   </write_sheet>
+    //   <append_sheet spreadsheet_id="1abc..." range="Sheet1!A:C">
+    //     2026-05-21\t새 항목\t완료
+    //   </append_sheet>
+    const sheetReadRegex = /<read_sheet\b([^>/]*?)\s*\/>/gi;
+    while ((match = sheetReadRegex.exec(aiMessage)) !== null) {
+        const a = _parseSheetAttrs(match[1]);
+        if (!a.spreadsheetId || !a.range) {
+            report.push(`❌ Sheet Read: spreadsheet_id / range 누락`);
+            continue;
+        }
+        try {
+            const { readSheetRange, valuesToMarkdownTable } = await import('../../features/sheets');
+            const r = await readSheetRange(ctx.context, a.spreadsheetId, a.range);
+            if (r.ok) {
+                const md = valuesToMarkdownTable(r.values);
+                report.push(`📊 Sheet Read: ${a.spreadsheetId.slice(0, 8)}…/${r.range} (${r.values.length} rows)`);
+                ctx.chatHistory.push({
+                    role: 'system',
+                    content: `[Sheet read ${r.range}]\n${md}`,
+                    internal: true,
+                });
+            } else {
+                report.push(`❌ Sheet Read Failed: ${r.error}`);
+            }
+        } catch (err: any) { report.push(`❌ Sheet Read Error: ${err?.message ?? String(err)}`); }
+    }
+    const sheetWriteRegex = /<write_sheet\b([^>]*)>([\s\S]*?)<\/write_sheet>/gi;
+    while ((match = sheetWriteRegex.exec(aiMessage)) !== null) {
+        const a = _parseSheetAttrs(match[1]);
+        const body = match[2];
+        if (!a.spreadsheetId || !a.range) {
+            report.push(`❌ Sheet Write: spreadsheet_id / range 누락`);
+            continue;
+        }
+        try {
+            const { writeSheetRange, parseTsvBody } = await import('../../features/sheets');
+            const values = parseTsvBody(body);
+            if (values.length === 0) {
+                report.push(`❌ Sheet Write: 본문 비어있음`);
+                continue;
+            }
+            const r = await writeSheetRange(ctx.context, a.spreadsheetId, a.range, values);
+            if (r.ok) {
+                report.push(`📊 Sheet Write: ${r.updatedRange} (${r.updatedCells} cells)`);
+            } else {
+                report.push(`❌ Sheet Write Failed: ${r.error}`);
+            }
+        } catch (err: any) { report.push(`❌ Sheet Write Error: ${err?.message ?? String(err)}`); }
+    }
+    const sheetAppendRegex = /<append_sheet\b([^>]*)>([\s\S]*?)<\/append_sheet>/gi;
+    while ((match = sheetAppendRegex.exec(aiMessage)) !== null) {
+        const a = _parseSheetAttrs(match[1]);
+        const body = match[2];
+        if (!a.spreadsheetId || !a.range) {
+            report.push(`❌ Sheet Append: spreadsheet_id / range 누락`);
+            continue;
+        }
+        try {
+            const { appendSheetRows, parseTsvBody } = await import('../../features/sheets');
+            const values = parseTsvBody(body);
+            if (values.length === 0) {
+                report.push(`❌ Sheet Append: 본문 비어있음`);
+                continue;
+            }
+            const r = await appendSheetRows(ctx.context, a.spreadsheetId, a.range, values);
+            if (r.ok) {
+                report.push(`📊 Sheet Append: ${r.appendedRange} (${r.updatedCells} cells)`);
+            } else {
+                report.push(`❌ Sheet Append Failed: ${r.error}`);
+            }
+        } catch (err: any) { report.push(`❌ Sheet Append Error: ${err?.message ?? String(err)}`); }
+    }
+}
@@ -0,0 +1,69 @@
+import { HandlerContext } from './types';
+import { _parseTaskAttrs } from '../attrParsers';
+
+// Action 13/14/15: Task tracker — _shared/tasks.md 에 누적.
+// 회의록·계획·작업 진척 추적의 단일 출처. status: open/in_progress/blocked/done.
+//   <add_task title="..." owner="@me" due="2026-05-24T18:00" notes="..."/>
+//   <update_task id="t_001" status="in_progress" notes="진행중"/>
+//   <complete_task id="t_001"/>
+export async function applyTasksActions(ctx: HandlerContext): Promise<void> {
+    const { aiMessage, report } = ctx;
+    let match: RegExpExecArray | null;
+
+    const addTaskRegex = /<add_task\b([^>/]*?)\s*\/>/gi;
+    while ((match = addTaskRegex.exec(aiMessage)) !== null) {
+        const a = _parseTaskAttrs(match[1]);
+        if (!a.title) { report.push(`❌ Add Task: title 누락`); continue; }
+        try {
+            const { readTaskStore, writeTaskStore, addTask } = await import('../../features/tasks');
+            const store = readTaskStore(ctx.context);
+            const created = addTask(store, {
+                title: a.title,
+                owner: a.owner,
+                due: a.due,
+                notes: a.notes,
+                status: a.status,
+            });
+            writeTaskStore(ctx.context, store);
+            report.push(`📋 Task Added: ${created.id} · ${created.title}${created.due ? ' (due ' + created.due + ')' : ''}`);
+        } catch (err: any) { report.push(`❌ Add Task Error: ${err?.message ?? String(err)}`); }
+    }
+    const updTaskRegex = /<update_task\b([^>/]*?)\s*\/>/gi;
+    while ((match = updTaskRegex.exec(aiMessage)) !== null) {
+        const a = _parseTaskAttrs(match[1]);
+        if (!a.id) { report.push(`❌ Update Task: id 누락`); continue; }
+        try {
+            const { readTaskStore, writeTaskStore, updateTask } = await import('../../features/tasks');
+            const store = readTaskStore(ctx.context);
+            const patch: any = {};
+            if (a.title) patch.title = a.title;
+            if (a.owner) patch.owner = a.owner;
+            if (a.due) patch.due = a.due;
+            if (a.notes) patch.notes = a.notes;
+            if (a.status) patch.status = a.status;
+            const updated = updateTask(store, a.id, patch);
+            if (!updated) {
+                report.push(`❌ Update Task: ${a.id} 를 active 목록에서 못 찾음`);
+            } else {
+                writeTaskStore(ctx.context, store);
+                report.push(`📋 Task Updated: ${updated.id} → ${updated.status}${updated.due ? ' (due ' + updated.due + ')' : ''}`);
+            }
+        } catch (err: any) { report.push(`❌ Update Task Error: ${err?.message ?? String(err)}`); }
+    }
+    const compTaskRegex = /<complete_task\b([^>/]*?)\s*\/>/gi;
+    while ((match = compTaskRegex.exec(aiMessage)) !== null) {
+        const a = _parseTaskAttrs(match[1]);
+        if (!a.id) { report.push(`❌ Complete Task: id 누락`); continue; }
+        try {
+            const { readTaskStore, writeTaskStore, completeTask } = await import('../../features/tasks');
+            const store = readTaskStore(ctx.context);
+            const closed = completeTask(store, a.id);
+            if (!closed) {
+                report.push(`❌ Complete Task: ${a.id} 못 찾음 (이미 done 이거나 존재 X)`);
+            } else {
+                writeTaskStore(ctx.context, store);
+                report.push(`✅ Task Done: ${closed.id} · ${closed.title}`);
+            }
+        } catch (err: any) { report.push(`❌ Complete Task Error: ${err?.message ?? String(err)}`); }
+    }
+}
@@ -0,0 +1,41 @@
+import * as vscode from 'vscode';
+import type { TransactionManager } from '../../core/transaction';
+import type { ChatMessage } from '../../agent';
+
+/**
+ * `executeActions` 가 매 턴 새로 만드는 공유 컨텍스트. 모든 action handler 가
+ * 같은 객체를 받아서 작업 결과를 누적 — 결과 텍스트 (`report`), brain 수정
+ * 플래그, 첫 생성 파일 경로 등이 모두 여기 모인다.
+ *
+ * 모든 handler 는 같은 signature: `apply<Group>Actions(ctx: HandlerContext): Promise<void>`.
+ * 반환값은 없음 — 결과는 ctx 객체에 *mutate* 로 누적된다 (배열 push, 콜백 호출).
+ *
+ * 왜 free function + ctx 패턴인가:
+ *   - 15+ 종류의 action handler 가 transactionManager / report / chatHistory
+ *     를 모두 공유. 매번 args 로 7-8개 던지면 호출부가 지저분해짐.
+ *   - executeActions 의 try/catch 가 transactionManager.rollback() 을 책임지므로
+ *     handler 들은 throw 만 잘 하면 됨 (FileSystemError 등).
+ */
+export interface HandlerContext {
+    /** AI 가 한 턴에 내뱉은 raw text — handler 들이 자기 regex 로 자기 tag 만 추출. */
+    aiMessage: string;
+    /** 워크스페이스 루트 — validatePath() 로 sandbox 보장. */
+    rootPath: string;
+    /** 활성 brain 의 절대 디렉토리. brain 안 파일이면 brainModified 플래그 set. */
+    activeBrainDir: string;
+    /** Handler 들이 사용자에게 보일 결과 라인을 push 하는 곳. ex: "✅ Created: foo.ts". */
+    report: string[];
+    /** 일부 handler (read_file, list_files, read_brain, sheet_read, calendar) 는
+     *  결과를 다음 턴의 컨텍스트에 주입하기 위해 internal system message 로 push. */
+    chatHistory: ChatMessage[];
+    /** brain 안 파일이 수정됐다고 표시 — executeActions 가 끝나서 자동 sync 결정. */
+    markBrainModified: () => void;
+    /** 새로 생성된 파일 절대경로를 기록 — executeActions 가 끝나서 editor 에 열기. */
+    setFirstCreated: (absPath: string) => void;
+    /** dry-run / commit / rollback 라이프사이클은 호출자(executeActions)가 책임.
+     *  handler 들은 record() 만 호출 (state-changing action 시점에). */
+    transactionManager: TransactionManager;
+    /** vscode.ExtensionContext — feature module (calendar/sheets/tasks) 들이 OAuth
+     *  토큰 등 secrets / globalState 에 접근할 때 필요. */
+    context: vscode.ExtensionContext;
+}
@@ -0,0 +1,116 @@
+import type { TaskStatus } from '../features/tasks';
+
+/**
+ * Action-tag attribute 파서 3개 — pure / stateless / 테스트에서 직접 import.
+ *
+ * 공통 패턴: `key="value"` | `key='value'` | `key=bare` 모두 받아서 object 로.
+ * LLM 이 어떤 따옴표 스타일로 emit 해도 통과시키기 위함 — 모델 prompt 에 "큰
+ * 따옴표만 써" 같은 규칙을 강제하면 다른 환각이 늘어나서, 파서 쪽이 관대.
+ *
+ * `_` 접두는 "agent.ts 의 internal 이지만 테스트용으로만 export" 표시 — 외부
+ * 사용처는 agent.ts 의 executeActions 와 tests/{taskStore,sheetsApi,calendarApi}.test.ts.
+ * agent.ts 는 이 모듈을 re-export 해서 기존 import 경로 (`from '../src/agent'`) 유지.
+ */
+
+const ATTR_RE = /([\w-]+)\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s>]+))/g;
+
+/**
+ * <add_task> / <update_task> / <complete_task> 의 attribute 파서.
+ * 모든 필드 optional 로 받고 caller 가 필수 체크. status 는 정규화 (in_progress, 등).
+ */
+export function _parseTaskAttrs(raw: string): {
+    id?: string;
+    title?: string;
+    owner?: string;
+    due?: string;
+    notes?: string;
+    status?: TaskStatus;
+} {
+    const out: any = {};
+    const re = new RegExp(ATTR_RE.source, 'g');
+    let m: RegExpExecArray | null;
+    while ((m = re.exec(raw)) !== null) {
+        const key = m[1].toLowerCase();
+        const val = (m[2] ?? m[3] ?? m[4] ?? '').trim();
+        if (!val) continue;
+        switch (key) {
+            case 'id': out.id = val; break;
+            case 'title': out.title = val; break;
+            case 'owner': out.owner = val; break;
+            case 'due': out.due = val; break;
+            case 'notes': out.notes = val; break;
+            case 'status': {
+                const v = val.toLowerCase().replace(/\s+/g, '_');
+                if (v === 'in_progress' || v === 'inprogress' || v === 'progress') out.status = 'in_progress';
+                else if (v === 'blocked' || v === 'block') out.status = 'blocked';
+                else if (v === 'done' || v === 'completed' || v === 'closed') out.status = 'done';
+                else out.status = 'open';
+                break;
+            }
+        }
+    }
+    return out;
+}
+
+/**
+ * <read_sheet> / <write_sheet> / <append_sheet> 의 attribute 문자열을 객체로 파싱.
+ * spreadsheet_id / spreadsheetId / sheetId 모두 받는 — LLM 의 변형 emission 흡수.
+ */
+export function _parseSheetAttrs(raw: string): { spreadsheetId?: string; range?: string } {
+    const out: { spreadsheetId?: string; range?: string } = {};
+    const re = new RegExp(ATTR_RE.source, 'g');
+    let m: RegExpExecArray | null;
+    while ((m = re.exec(raw)) !== null) {
+        const key = m[1].toLowerCase();
+        const val = (m[2] ?? m[3] ?? m[4] ?? '').trim();
+        if (!val) continue;
+        if (key === 'spreadsheet_id' || key === 'spreadsheetid' || key === 'sheet_id' || key === 'sheetid') {
+            out.spreadsheetId = val;
+        } else if (key === 'range') {
+            out.range = val;
+        }
+    }
+    return out;
+}
+
+/**
+ * <create_calendar_event ...> 의 attribute 문자열을 객체로 파싱.
+ * 큰따옴표 / 작은따옴표 / 따옴표 없이 (공백·`>` 으로 종료) 모두 허용 — LLM 이 어떤
+ * 스타일로 emit 해도 통과시키기 위함. 단위테스트 가능하도록 export.
+ */
+export function _parseCalEventAttrs(raw: string): {
+    title?: string;
+    start?: string;
+    end?: string;
+    duration?: number;
+    location?: string;
+    allDay?: boolean;
+} {
+    const out: any = {};
+    // `-` 포함 키 (all-day) 지원 — 일부러 ATTR_RE 와 동일 패턴이지만 매번 fresh
+    // regex 인스턴스를 만들어 lastIndex 공유 버그를 회피.
+    const re = new RegExp(ATTR_RE.source, 'g');
+    let m: RegExpExecArray | null;
+    while ((m = re.exec(raw)) !== null) {
+        const key = m[1].toLowerCase();
+        const val = (m[2] ?? m[3] ?? m[4] ?? '').trim();
+        if (!val) continue;
+        switch (key) {
+            case 'title': out.title = val; break;
+            case 'start': out.start = val; break;
+            case 'end': out.end = val; break;
+            case 'duration': {
+                const n = parseInt(val, 10);
+                if (!Number.isNaN(n) && n > 0) out.duration = n;
+                break;
+            }
+            case 'location': out.location = val; break;
+            case 'all_day':
+            case 'allday':
+            case 'all-day':
+                out.allDay = val === 'true' || val === '1' || val === 'yes';
+                break;
+        }
+    }
+    return out;
+}
@@ -0,0 +1,159 @@
+import * as vscode from 'vscode';
+import { logInfo, logError } from '../../utils';
+import type { ChatMessage } from '../../agent';
+import {
+    extractVisibleFinal,
+    shouldAutoContinue,
+    mergeContinuationParts,
+    buildContinuationUserPrompt,
+    CONTINUATION_SYSTEM_PROMPT,
+} from '../../core/responseRecovery';
+import { isRestartedAnswer } from '../../lib/contextBuilders/outputSanitization';
+import {
+    estimateTokens,
+    estimateMessagesTokens,
+    computeOutputBudget,
+    classifyStopReason,
+    type ContextLimits,
+} from '../../lib/contextManager';
+import { recordTelemetry } from '../../core/telemetry';
+
+/** Result shape of `extractVisibleFinal` — kept structural here to avoid a hard import dependency. */
+type CleanedAnswer = ReturnType<typeof extractVisibleFinal>;
+
+export interface ApplyAutoContinuationDeps {
+    /** Bound `this.streamChatOnce` — same params shape as the original method. */
+    streamChatOnce: (params: any) => Promise<{ text: string; stopReason?: string; aborted: boolean }>;
+    /** Bound `this.isStaleRun`. */
+    isStaleRun: (runId: number) => boolean;
+    /** Live AbortSignal getter — controller is reassigned across turns. */
+    getAbortSignal: () => AbortSignal | undefined;
+    /** Webview for posting `autoContinue` UI updates. */
+    getWebview: () => vscode.Webview | undefined;
+}
+
+export interface ApplyAutoContinuationInput {
+    /** From `extractVisibleFinal(aiResponseText)`. The loop mutates `cleaned.visible`. */
+    cleaned: CleanedAnswer;
+    finishStopReason: string | undefined;
+    prompt: string | null;
+    chatHistory: ChatMessage[];  // used to find the original user prompt fallback
+    maxOutputTokens: number;     // the first generation's output budget (carried for first round)
+    ctxLimits: ContextLimits;
+    config: any;                  // getConfig() — we read autoContinueOnOutputLimit, maxAutoContinuations, contextOverflowPolicy
+    runId: number;
+    useLmStudioSdk: boolean;
+    engine: string;
+    ollamaUrl: string;
+    actualModel: string;
+    temperature: number;
+    postLiveDeltas: boolean;
+}
+
+export interface ApplyAutoContinuationResult {
+    cleaned: CleanedAnswer;
+    finishStopReason: string | undefined;
+    continuationCount: number;
+}
+
+export async function applyAutoContinuation(
+    deps: ApplyAutoContinuationDeps,
+    input: ApplyAutoContinuationInput,
+): Promise<ApplyAutoContinuationResult> {
+    let {
+        cleaned,
+        finishStopReason,
+        prompt,
+        chatHistory,
+        maxOutputTokens,
+        ctxLimits,
+        config,
+        runId,
+        useLmStudioSdk,
+        engine,
+        ollamaUrl,
+        actualModel,
+        temperature,
+        postLiveDeltas,
+    } = input;
+
+    // (c) Auto-continuation — the visible answer hit the output-token ceiling.
+    let continuationCount = 0;
+    if (config.autoContinueOnOutputLimit && config.maxAutoContinuations > 0) {
+        const originalUserPrompt = prompt || (chatHistory.find(m => m.role === 'user' && typeof m.content === 'string')?.content as string) || '';
+        let lastOutputTokens = estimateTokens(cleaned.visible);
+        let lastMaxOutputTokens = maxOutputTokens;   // budget the last round actually had (≠ first gen's after round 1)
+        while (
+            shouldAutoContinue(classifyStopReason(finishStopReason), cleaned.visible, lastOutputTokens, lastMaxOutputTokens)
+            && continuationCount < config.maxAutoContinuations
+            && !deps.getAbortSignal()?.aborted
+            && !deps.isStaleRun(runId)
+        ) {
+            continuationCount++;
+            const continuationStartMs = Date.now();
+            deps.getWebview()?.postMessage({ type: 'autoContinue', value: `답변이 길어 이어서 정리하는 중입니다... (${continuationCount}/${config.maxAutoContinuations})` });
+            try {
+                const contMsgs: ChatMessage[] = [
+                    { role: 'system', content: CONTINUATION_SYSTEM_PROMPT, internal: true },
+                    { role: 'user', content: buildContinuationUserPrompt(originalUserPrompt, cleaned.visible) },
+                ];
+                lastMaxOutputTokens = computeOutputBudget(estimateMessagesTokens(contMsgs), ctxLimits).maxOutputTokens;
+                // Stream the continuation through the same channel as the main turn so
+                // the user sees the answer keep growing instead of freezing for 10–30s
+                // while we silently call non-streaming. The trailing streamReplace
+                // (after sanitize / merge) corrects any overlap the model re-emits.
+                const cr = await deps.streamChatOnce({
+                    runId, useLmStudioSdk, engine, ollamaUrl, modelName: actualModel,
+                    messages: contMsgs,
+                    temperature,
+                    maxTokens: lastMaxOutputTokens,
+                    contextLength: ctxLimits.contextLength,
+                    contextOverflowPolicy: config.contextOverflowPolicy,
+                    signal: deps.getAbortSignal()!,
+                    postLiveDeltas,
+                });
+                if (cr.aborted) {
+                    logInfo('Auto-continuation aborted mid-stream.', { model: actualModel, round: continuationCount });
+                    break;
+                }
+                finishStopReason = cr.stopReason;
+                const ccl = extractVisibleFinal(cr.text);
+                if (!ccl.visible.trim()) {
+                    logInfo('Continuation produced no visible text — stopping.', { model: actualModel, round: continuationCount });
+                    break;
+                }
+                // A weak model often ignores "continue from here" and re-generates the
+                // whole answer from the top. Discard such a restart instead of merging
+                // it — otherwise the user gets the entire analysis twice.
+                if (isRestartedAnswer(cleaned.visible, ccl.visible)) {
+                    logInfo('Continuation restarted the answer instead of continuing — discarding it.', { model: actualModel, round: continuationCount });
+                    break;
+                }
+                const before = cleaned.visible;
+                cleaned = { ...cleaned, visible: mergeContinuationParts(cleaned.visible, ccl.visible), wasThoughtOnly: false };
+                lastOutputTokens = estimateTokens(ccl.visible);
+                logInfo('Auto-continued the answer.', { model: actualModel, round: continuationCount, addedChars: ccl.visible.length, totalChars: cleaned.visible.length, contStopReason: cr.stopReason, contMaxTokens: lastMaxOutputTokens });
+                recordTelemetry({
+                    kind: 'continuation',
+                    durationMs: Date.now() - continuationStartMs,
+                    model: actualModel, engine,
+                    outputTokens: lastOutputTokens,
+                    round: continuationCount,
+                    stopReason: cr.stopReason,
+                    note: `addedChars=${ccl.visible.length} mergedAdd=${cleaned.visible.length - before.length}`,
+                });
+                // Guard against a continuation that adds (almost) nothing new after dedup — stop instead of spinning.
+                if (cleaned.visible.length - before.length < 20) {
+                    logInfo('Continuation added negligible new text — stopping.', { model: actualModel, round: continuationCount });
+                    break;
+                }
+            } catch (e: any) {
+                logError('Auto-continuation failed.', { model: actualModel, round: continuationCount, error: e?.message ?? String(e) });
+                break;
+            }
+        }
+        if (deps.isStaleRun(runId)) return { cleaned, finishStopReason, continuationCount };
+    }
+
+    return { cleaned, finishStopReason, continuationCount };
+}
@@ -0,0 +1,75 @@
+import { stripAstraFormattingForAgentMode } from '../../lib/contextBuilders/systemPromptShaping';
+import { estimateTokens } from '../../lib/contextManager';
+import { logInfo } from '../../utils';
+
+export interface BuildAgentModeSystemPromptInput {
+    /** Base system prompt — `Astra: …` block etc. */
+    systemPrompt: string;
+    /** Agent skill content the user selected. */
+    agentSkillContext: string;
+    /** Pre-built mode-bridge context (or ''). */
+    modeBridgeCtx: string;
+    /** [PRIOR TURN CONCLUSION] 블록 — 직전 assistant 답변의 첫 문장 (또는 ''). */
+    priorConclusionCtx: string;
+    designerCtx: string;
+    secondBrainTraceCtx: string;
+    memoryCtx: string;
+    knowledgeContextForPrompt: string;
+    contextBlock: string;
+    negativeCtx: string;
+    /** For token-cost logging. */
+    actualModel: string;
+    /** For token-cost logging — getConfig().contextLength. */
+    contextLength: number;
+}
+
+export function buildAgentModeSystemPrompt(input: BuildAgentModeSystemPromptInput): string {
+    const {
+        systemPrompt,
+        agentSkillContext,
+        modeBridgeCtx,
+        priorConclusionCtx,
+        designerCtx,
+        secondBrainTraceCtx,
+        memoryCtx,
+        knowledgeContextForPrompt,
+        contextBlock,
+        negativeCtx,
+        actualModel,
+        contextLength,
+    } = input;
+
+    // The Agent's prompt IS the primary directive (role / persona / tone / output format),
+    // so it LEADS the system prompt — models anchor on the first persona they see, not the
+    // last, especially small ones. The Astra base prompt is reduced to neutral scaffolding
+    // (action tags, current date, anti-leak rules) and follows; a short reminder at the very
+    // end keeps the model from drifting back to a generic assistant.
+    const strippedSystemPrompt = stripAstraFormattingForAgentMode(systemPrompt);
+    const agentPromptText = (agentSkillContext || '').trim();
+    if (estimateTokens(agentPromptText) > Math.floor(contextLength * 0.5)) {
+        logInfo('Agent prompt is unusually large relative to the context window.', {
+            model: actualModel, agentPromptTokens: estimateTokens(agentPromptText), contextLength: contextLength,
+        });
+    }
+
+    const agentBlock = [
+        '[AGENT MODE — PRIMARY DIRECTIVE]',
+        'A specialized Agent has been selected by the user. The Agent System Prompt below is your',
+        'PRIMARY directive: it defines your role, persona, tone, and output format. Follow it exactly.',
+        'Everything after the Agent block (action-tag reference, date, brain/project context) is technical',
+        'scaffolding — use it only as the Agent\'s task requires. Do NOT impose a generic assistant',
+        'format (e.g. ## 요약 / ## 상세 설명 / ## 제안) unless the Agent explicitly asks for one.',
+        '',
+        '--- AGENT SYSTEM PROMPT START ---',
+        agentPromptText || '(this agent has no instructions yet — fall back to being a concise, direct assistant)',
+        '--- AGENT SYSTEM PROMPT END ---',
+    ].join('\n');
+    const agentTailReminder = '\n\n[REMINDER] You are operating as the Agent defined above. Keep its role, persona, and output format. Do not fall back to a default assistant style or section format.';
+
+    // [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — agentBlock(앞)·reminder(뒤)·negative 는 보호.
+    // memoryCtx(RAG/메모리/lessons)도 [CONTEXT] 안에 넣어 토큰이 빡빡할 때 대화 기록보다 먼저 잘리게 한다.
+    const priorConclusionBlock = priorConclusionCtx ? '\n\n' + priorConclusionCtx : '';
+    const fullSystemPrompt = `${agentBlock}${modeBridgeCtx ? '\n\n' + modeBridgeCtx : ''}${priorConclusionBlock}\n\n${strippedSystemPrompt}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}${agentTailReminder}`;
+
+    return fullSystemPrompt;
+}
@@ -0,0 +1,82 @@
+import {
+    isProjectKnowledgeCreationRequest,
+    buildAstraStanceContext,
+} from '../../lib/contextBuilders/localProjectIntent';
+import { isThinkingPartnerRequest } from '../../lib/contextBuilders/promptDetection';
+import { buildKnowledgeMixPolicy } from '../../retrieval/knowledgeMix';
+
+export interface BuildAstraModeSystemPromptInput {
+    prompt: string | null;
+    systemPrompt: string;
+    modeBridgeCtx: string;
+    /** [PRIOR TURN CONCLUSION] 블록 — 직전 assistant 답변의 첫 문장. follow-up 정정 대응용. */
+    priorConclusionCtx: string;
+    designerCtx: string;
+    projectArchitectureCtx: string;
+    secondBrainTraceCtx: string;
+    memoryCtx: string;
+    knowledgeContextForPrompt: string;
+    contextBlock: string;
+    negativeCtx: string;
+    isCasualConversation: boolean;
+    localPathContext: string;
+    /** From this._turnCtx.knowledgeMix — pass null when absent. */
+    knowledgeMix: any;
+}
+
+export function buildAstraModeSystemPrompt(input: BuildAstraModeSystemPromptInput): string {
+    const {
+        prompt,
+        systemPrompt,
+        modeBridgeCtx,
+        priorConclusionCtx,
+        designerCtx,
+        projectArchitectureCtx,
+        secondBrainTraceCtx,
+        memoryCtx,
+        knowledgeContextForPrompt,
+        contextBlock,
+        negativeCtx,
+        isCasualConversation,
+        localPathContext,
+        knowledgeMix,
+    } = input;
+
+    // 기존 Astra 모드 (에이전트 미선택)
+    const localProjectKnowledgeCtx = prompt && localPathContext && isProjectKnowledgeCreationRequest(prompt)
+        ? `\n\n[LOCAL PROJECT KNOWLEDGE CREATION OVERRIDE]\nThe user gave an accessible local project path and asked to create project knowledge. Do not ask blocking scope questions. Use a sensible default MVP: create or propose a project overview note from the inspected tree and priority file previews. If writing is not explicitly safe, provide the concrete note draft and target path.`
+        : '';
+    const thinkingPartnerCtx = prompt && !isCasualConversation && isThinkingPartnerRequest(prompt)
+        ? `\n\n[JARVIS THINKING PARTNER MODE]\nThe user is using this tool to clarify project direction, not just to receive generic advice. Give a clear opinionated verdict first. Then separate confirmed facts, inferences, concerns, decision forks, and the next small action. Do not merely say the direction is good. If evidence is thin, say exactly what is missing and what file or record should be checked next.`
+        : '';
+    const astraStanceCtx = prompt && !isCasualConversation
+        ? `\n\n${buildAstraStanceContext(prompt, localPathContext)}`
+        : '';
+    // The v4 knowledge-management policy only matters when knowledge is actually in play —
+    // skip it for greetings/small talk so it doesn't dilute the [CASUAL CONVERSATION MODE] directive.
+    const v4PolicyCtx = isCasualConversation ? '' : [
+        "\n### 🏛️ 지식 관리 정책 v4.0 (Knowledge Management Policy Applied)",
+        "- [신뢰도] '의도적으로 작성된 글'은 Medium 이상의 신뢰도를 부여하여 최우선 근거로 활용할 것.",
+        "- [품질] 데이터의 양보다 '추론 기여 밀도'를 중시하여 핵심 위주로 깊이 있게 서술할 것.",
+        "- [충돌] 지식 간 충돌 발생 시 시스템이 독단적으로 판단하지 말고, 반드시 [CONFLICT WARNING] 플래그와 함께 상충되는 두 관점을 모두 명시하여 사용자에게 판단을 위임할 것."
+    ].join('\n');
+
+    // [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — negative constraints 는 보호.
+    const casualCtx = isCasualConversation
+        ? '\n\n[CASUAL CONVERSATION MODE]\nThe user sent a greeting, acknowledgement, or light conversational message. Reply naturally and briefly to the message itself. Do not use Second Brain, memory, project records, reports, references, or analysis unless the user explicitly asks for them.'
+        : '';
+    // Knowledge Mix policy: tells the model how strongly to lean on Second Brain
+    // evidence vs. its own general knowledge for this turn. Suppressed for casual
+    // chat — pure greetings don't need to be told anything about RAG balance.
+    const knowledgeMixCtx = (!isCasualConversation && knowledgeMix)
+        ? (() => {
+            const block = buildKnowledgeMixPolicy(knowledgeMix);
+            return block ? `\n\n${block}` : '';
+        })()
+        : '';
+    // memoryCtx(RAG/메모리/lessons)는 [CONTEXT] 안에 — 토큰이 빡빡하면 대화 기록보다 먼저 잘림.
+    // priorConclusionCtx 는 modeBridgeCtx 와 같은 위치 (base systemPrompt 직후) — 모델이
+    // 자기 직전 결론을 anchor 로 잡고 사용자의 follow-up 을 그 결론에 대한 정정으로 해석하게.
+    const priorConclusionBlock = priorConclusionCtx ? '\n\n' + priorConclusionCtx : '';
+    return `${systemPrompt}${modeBridgeCtx ? '\n\n' + modeBridgeCtx : ''}${priorConclusionBlock}${designerCtx}${projectArchitectureCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}${knowledgeMixCtx}${casualCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
+}
@@ -0,0 +1,57 @@
+import type { ChatMessage } from '../../agent';
+import { computeModeSignature } from '../../lib/contextBuilders/systemPromptShaping';
+import { buildLastTopicLine } from '../../lib/contextBuilders/lastTopicLine';
+import { getActiveBrainProfile, logError } from '../../utils';
+
+export interface BuildModeBridgeContextInput {
+    options: any;  // the handlePrompt options object
+    lastModeSignature: string | null;
+    chatHistory: ChatMessage[];
+}
+
+export interface BuildModeBridgeContextResult {
+    /** "[MODE TRANSITION BRIDGE]\n…" string to embed in system prompt, or '' when no transition. */
+    modeBridgeCtx: string;
+    /** The newly-computed signature — caller should store this as the next-turn comparison baseline. */
+    newSignature: string | null;
+}
+
+export function buildModeBridgeContext(input: BuildModeBridgeContextInput): BuildModeBridgeContextResult {
+    const { options, lastModeSignature, chatHistory } = input;
+
+    // v2.2.69 — 모드 전환 bridge. 현재 mode signature 를 직전 값과 비교해 바뀌었으면
+    // "이전 대화는 X 모드에서 Y 주제로 진행됨 / 지금부터 Z 모드" 한 줄을 system prompt 에 끼운다.
+    // chatHistory 자체는 손대지 않으므로 사용자 입장에선 대화가 연속되어 보이면서도
+    // 모델은 "모드가 바뀐 직후" 임을 인지한다.
+    let modeBridgeCtx = '';
+    let newSignature: string | null = null;
+    try {
+        const agentSkillName = options.agentSkillContext
+            ? (options.agentSkillContext.split('\n')[0] || '').slice(0, 60).replace(/^#\s*/, '').trim()
+            : '';
+        const currentSig = computeModeSignature({
+            agentSkillName: agentSkillName || undefined,
+            companyMode: !!(options as any).companyMode,
+            multiAgent: !!(options as any).multiAgent,
+            brainName: getActiveBrainProfile()?.name,
+        });
+        if (lastModeSignature !== null && lastModeSignature !== currentSig) {
+            const topic = buildLastTopicLine(chatHistory);
+            const bridgeLines = [
+                '',
+                '[MODE TRANSITION BRIDGE]',
+                `이전 모드: ${lastModeSignature}`,
+                `현재 모드: ${currentSig}`,
+            ];
+            if (topic) bridgeLines.push(`직전 대화 주제(한 줄): ${topic}`);
+            bridgeLines.push('대화 history 는 그대로 이어진다. 새 모드의 페르소나/포맷을 따르되, 직전까지 사용자가 다루던 맥락을 잊지 말 것.');
+            modeBridgeCtx = bridgeLines.join('\n');
+        }
+        newSignature = currentSig;
+    } catch (e: any) {
+        logError('Mode-bridge computation failed (non-fatal).', { error: e?.message || String(e) });
+        return { modeBridgeCtx: '', newSignature: null };
+    }
+
+    return { modeBridgeCtx, newSignature };
+}
@@ -0,0 +1,117 @@
+import * as vscode from 'vscode';
+import * as path from 'path';
+import type { ChatMessage } from '../../agent';
+import type { BrainProfile } from '../../config';
+import { findBrainFiles } from '../../utils';
+import {
+    isExplicitSecondBrainRequest,
+    isSecondBrainInventoryRequest,
+} from '../../lib/contextBuilders/promptDetection';
+import { buildSecondBrainInventoryContext } from '../../lib/contextBuilders/secondBrainInventory';
+import { buildLocalProjectPathContext } from '../../lib/contextBuilders/localProjectPath';
+import { buildRecentProjectKnowledgeContext } from '../../lib/contextBuilders/recentProjectKnowledge';
+import { buildJarvisProjectBriefContext } from '../../lib/contextBuilders/jarvisProjectBrief';
+import { buildAstraModeArchitectureContext } from '../../lib/contextBuilders/astraModeArchitecture';
+import { buildSecondBrainTrace, SecondBrainTrace } from '../../features/secondBrainTrace';
+
+export interface BuildTurnContextBlocksInput {
+    prompt: string | null;
+    options: any;  // the handlePrompt options object (we read secondBrainTraceEnabled, brainProfileId)
+    isCasualConversation: boolean;
+    loopDepth: number;
+    config: any;  // getConfig() result — we read memoryLongTermFiles, maxContextSize, brainProfiles
+    activeBrain: BrainProfile;
+    chatHistory: ChatMessage[];
+    rootPath: string;
+}
+
+export interface BuildTurnContextBlocksResult {
+    contextBlock: string;
+    brainContext: string;
+    brainInventoryCtx: string;
+    brainFiles: string[];
+    brainPreview: string;
+    localPathContext: string;
+    secondBrainTrace: SecondBrainTrace | null;
+}
+
+export function buildTurnContextBlocks(input: BuildTurnContextBlocksInput): BuildTurnContextBlocksResult {
+    const {
+        prompt,
+        options,
+        isCasualConversation,
+        loopDepth,
+        config,
+        activeBrain,
+        chatHistory,
+        rootPath,
+    } = input;
+
+    let contextBlock = '';
+    const brainFiles = findBrainFiles(activeBrain.localBrainPath);
+    let secondBrainTrace: SecondBrainTrace | null = null;
+    if (options.secondBrainTraceEnabled && prompt && loopDepth === 0 && !isCasualConversation) {
+        secondBrainTrace = buildSecondBrainTrace(prompt, activeBrain.localBrainPath, {
+            force: isExplicitSecondBrainRequest(prompt),
+            limit: Math.max(config.memoryLongTermFiles, 5)
+        });
+    }
+    const brainPreview = brainFiles
+        .slice(0, 30)
+        .map(file => path.relative(activeBrain.localBrainPath, file))
+        .join('\n');
+    const brainContext = [
+        `[ACTIVE SECOND BRAIN]`,
+        `Use this Local Brain only when it is relevant to the user's current question.`,
+        `Name: ${activeBrain.name}`,
+        `Path: ${activeBrain.localBrainPath}`,
+        `Knowledge files: ${brainFiles.length}`,
+        activeBrain.description ? `Description: ${activeBrain.description}` : '',
+        brainPreview ? `Available file examples:\n${brainPreview}` : 'Files: none found'
+    ].filter(Boolean).join('\n');
+    const brainInventoryCtx = prompt && !isCasualConversation && isSecondBrainInventoryRequest(prompt)
+        ? `\n\n${buildSecondBrainInventoryContext(activeBrain, brainFiles)}`
+        : '';
+    const editor = vscode.window.activeTextEditor;
+    if (editor && editor.document.uri.scheme === 'file') {
+        const text = editor.document.getText();
+        const name = path.basename(editor.document.fileName);
+        if (text.trim().length > 0 && text.length < config.maxContextSize) {
+            contextBlock = `\n\n[Currently open file: ${name}]\n\`\`\`\n${text}\n\`\`\``;
+        }
+    }
+    const localPathContext = prompt && loopDepth === 0
+        ? buildLocalProjectPathContext(prompt, rootPath)
+        : '';
+    if (localPathContext) {
+        contextBlock += `\n\n${localPathContext}`;
+    }
+    const recentProjectKnowledgeContext = prompt && loopDepth === 0 && !isCasualConversation && !localPathContext
+        ? buildRecentProjectKnowledgeContext(prompt, rootPath, chatHistory)
+        : '';
+    if (recentProjectKnowledgeContext) {
+        contextBlock += `\n\n${recentProjectKnowledgeContext}`;
+    }
+    const projectBriefContext = prompt && loopDepth === 0 && !isCasualConversation
+        ? buildJarvisProjectBriefContext(prompt, localPathContext, recentProjectKnowledgeContext)
+        : '';
+    if (projectBriefContext) {
+        contextBlock += `\n\n${projectBriefContext}`;
+    }
+    const modeArchitectureContext = prompt && loopDepth === 0 && !isCasualConversation
+        ? buildAstraModeArchitectureContext(prompt)
+        : '';
+    if (modeArchitectureContext) {
+        contextBlock += `\n\n${modeArchitectureContext}`;
+    }
+
+    return {
+        contextBlock,
+        brainContext,
+        brainInventoryCtx,
+        brainFiles,
+        brainPreview,
+        localPathContext,
+        secondBrainTrace,
+    };
+}
@@ -0,0 +1,161 @@
+import { logInfo, logError } from '../../utils';
+import type { ChatMessage } from '../../agent';
+import {
+    estimateTokens,
+    estimateMessagesTokens,
+    computeOutputBudget,
+    trimHistoryToBudget,
+    truncateSystemPromptContext,
+    estimateModelParamsB,
+    type ContextLimits,
+} from '../../lib/contextManager';
+import { buildDroppedHistorySummary } from '../../lib/contextBuilders/droppedHistorySummary';
+
+export interface ComputeBudgetedRequestInput {
+    fullSystemPrompt: string;
+    /** Caller is expected to have run `capChatHistory` on this already. */
+    reqMessages: ChatMessage[];
+    actualModel: string;
+    /** Result of `getConfig()` — reads contextLength, maxOutputTokens, contextSafetyMargin, smallModelContextCap, autoCompactHistory. */
+    config: any;
+    imageCount: number;
+}
+
+export interface ComputeBudgetedRequestResult {
+    messagesForRequest: ChatMessage[];
+    ctxLimits: ContextLimits;
+    inputTokens: number;
+    maxOutputTokens: number;
+    systemTokens: number;
+    systemTruncated: boolean;
+    droppedHistoryCount: number;
+    budgetedHistoryLength: number;
+    /** Exact return shape of `computeOutputBudget`. */
+    outputBudget: { maxOutputTokens: number; available: number; tight: boolean };
+    modelParamB: number | null;
+    cappedForSmallModel: boolean;
+}
+
+/**
+ * 입력(시스템 프롬프트 + 대화 기록 + 이미지)을 컨텍스트 윈도우 예산에 맞게 정리하고
+ * 최종 요청 메시지 배열과 동적 출력 상한을 계산합니다.
+ *
+ * 호출 측에서 미리 capChatHistory 로 메시지 개수를 캡한 뒤 넘겨주는 것을 전제로 합니다
+ * (AgentExecutor.MAX_RETAINED_MESSAGES 같은 정적 한도는 이 함수의 관심사가 아닙니다).
+ */
+export function computeBudgetedRequest(input: ComputeBudgetedRequestInput): ComputeBudgetedRequestResult {
+    const { fullSystemPrompt, reqMessages, actualModel, config, imageCount } = input;
+
+    // ──────────────────────────────────────────────────────────────────
+    // [Context Limit Manager] context length 는 "답변을 그만큼 길게 써도 된다"
+    // 는 뜻이 아니다: 시스템 프롬프트 + 대화 기록 + 입력 + 생성될 답변 + 여유분 ≤ context length.
+    // 요청을 보내기 전에 입력 토큰을 추정해서
+    //   (1) 시스템 프롬프트가 과하면 [CONTEXT] 블록을 마지막 수단으로 줄이고
+    //   (2) 대화 기록을 남은 예산에 맞게 압축하고 (UI 표시용 chatHistory 는 건드리지 않음)
+    //   (3) 동적으로 출력 상한(maxOutputTokens)을 계산한다.
+    // ──────────────────────────────────────────────────────────────────
+    // Optional opt-in guard (g1nation.smallModelContextCap, OFF/0 by default): some very small
+    // models (≤3B) emit EOS as the first token when the prompt is near their context window
+    // even though it nominally fits. If the user opted in, budget ≤3B models against that
+    // smaller effective window. Never applied to 4B+ models, and never when the setting is 0 —
+    // capping squeezes the output-token budget, so it's a knob, not a default.
+    const modelParamB = estimateModelParamsB(actualModel);
+    const smallModelCap = config.smallModelContextCap; // 0 = disabled (default)
+    const cappedForSmallModel = smallModelCap > 0
+        && modelParamB !== null && modelParamB <= 3
+        && config.contextLength > smallModelCap;
+    const effectiveContextLength = cappedForSmallModel ? smallModelCap : config.contextLength;
+    if (cappedForSmallModel) {
+        logInfo('Small model detected — capping effective context window for budgeting.', {
+            model: actualModel, paramB: modelParamB,
+            nominalContext: config.contextLength, effectiveContext: effectiveContextLength,
+        });
+    }
+    const ctxLimits: ContextLimits = {
+        contextLength: effectiveContextLength,
+        maxOutputTokens: config.maxOutputTokens,
+        safetyMargin: config.contextSafetyMargin,
+        minOutputTokens: 512,
+    };
+    const imageTokenReserve = imageCount * 1024;
+
+    // Output budget we ACTUALLY reserve before trimming — not the bare
+    // minOutputTokens floor (512). If we only reserve 512, a long session
+    // is allowed to grow the prompt until ~512-1k tokens remain for the
+    // answer; small/MoE local models (e.g. gemma 4B-active) then emit EOS
+    // as the first token and return an empty response. Reserving ~10% of
+    // the window (>=2048) forces history/system trimming to keep a real
+    // answer-sized hole open. Capped at maxOutputTokens.
+    const preferredOutputReserve = Math.min(
+        ctxLimits.maxOutputTokens,
+        Math.max(2048, Math.floor(ctxLimits.contextLength * 0.1))
+    );
+
+    // (1) 시스템 프롬프트는 예산의 ~65%까지만 허용 — 그 이상이면 [CONTEXT] 블록부터 잘라낸다.
+    const systemCapTokens = Math.max(
+        1024,
+        Math.floor((ctxLimits.contextLength - ctxLimits.safetyMargin - preferredOutputReserve - imageTokenReserve) * 0.65)
+    );
+    const { prompt: budgetedSystemPrompt, truncated: systemTruncated } =
+        truncateSystemPromptContext(fullSystemPrompt, systemCapTokens);
+    if (systemTruncated) {
+        logInfo('System prompt context truncated to fit the context window.', { model: actualModel, systemCapTokens });
+    }
+    const systemTokens = estimateTokens(budgetedSystemPrompt) + 4;
+
+    // (2) 대화 기록 압축.
+    const historyBudget = Math.max(
+        256,
+        ctxLimits.contextLength - systemTokens - ctxLimits.safetyMargin - preferredOutputReserve - imageTokenReserve
+    );
+    let budgetedHistory: ChatMessage[] = reqMessages;
+    if (config.autoCompactHistory) {
+        // v2.2.69 — dropped 메시지를 받아 heuristic 요약을 만든 뒤 한 system 메시지로 prepend.
+        // 단순 count 마커는 "이전에 무슨 얘기를 했는지" 를 전혀 알려주지 않아 후속 턴에서 모델이
+        // 맥락을 잃어버리는 회귀를 낳았다. 이제는 U1/A1/U2/A2 골자가 남아 sliding window 가 동작.
+        const trim = trimHistoryToBudget<ChatMessage>(reqMessages, historyBudget, (_n, dropped) => ({
+            role: 'system',
+            content: buildDroppedHistorySummary(dropped),
+            internal: true,
+        }));
+        budgetedHistory = trim.messages;
+        if (trim.droppedCount > 0) {
+            logInfo('Conversation history compacted to fit the context window (with summary).', {
+                model: actualModel, droppedCount: trim.droppedCount, historyBudget,
+            });
+        }
+    }
+
+    const messagesForRequest: ChatMessage[] = [
+        { role: 'system', content: budgetedSystemPrompt, internal: true },
+        ...budgetedHistory
+    ];
+
+    // (3) 동적 출력 상한.
+    const inputTokens = estimateMessagesTokens(messagesForRequest) + imageTokenReserve;
+    const outputBudget = computeOutputBudget(inputTokens, ctxLimits);
+    const maxOutputTokens = outputBudget.maxOutputTokens;
+    if (outputBudget.tight) {
+        logError('Prompt nearly fills the context window — output budget is at the minimum.', {
+            model: actualModel, contextLength: ctxLimits.contextLength, inputTokens, maxOutputTokens,
+        });
+    }
+    logInfo('Context budget computed.', {
+        model: actualModel, contextLength: ctxLimits.contextLength,
+        inputTokens, maxOutputTokens, droppedHistory: reqMessages.length - budgetedHistory.length,
+    });
+
+    return {
+        messagesForRequest,
+        ctxLimits,
+        inputTokens,
+        maxOutputTokens,
+        systemTokens,
+        systemTruncated,
+        droppedHistoryCount: reqMessages.length - budgetedHistory.length,
+        budgetedHistoryLength: budgetedHistory.length,
+        outputBudget,
+        modelParamB,
+        cappedForSmallModel,
+    };
+}
@@ -0,0 +1,147 @@
+import { logError } from '../../utils';
+import { getConfig, BrainProfile } from '../../config';
+import { stripMarkdownFormatting, looksCutOff } from '../../core/responseRecovery';
+import {
+    sanitizeAssistantContent,
+    parseRationale,
+} from '../../lib/contextBuilders/outputSanitization';
+import {
+    isSecondBrainInventoryRequest,
+    isNoBrainDataRefusal,
+} from '../../lib/contextBuilders/promptDetection';
+import { buildSecondBrainInventoryFallbackAnswer } from '../../lib/contextBuilders/secondBrainInventory';
+import { isProjectKnowledgeCreationRequest } from '../../lib/contextBuilders/localProjectIntent';
+import {
+    buildProjectKnowledgeFallbackAnswer,
+    writeProjectKnowledgeRecord,
+} from '../../lib/contextBuilders/projectKnowledge';
+import { enforceLocalPathReviewAnswer } from '../../lib/contextBuilders/localProjectPath';
+import { isBlockingProjectKnowledgeAnswer } from '../../lib/contextBuilders/recentProjectKnowledge';
+import {
+    enforceProjectClaimPolicyInAnswer,
+    SecondBrainTrace,
+} from '../../features/secondBrainTrace';
+import {
+    estimateTokens,
+    classifyStopReason,
+    truncationNotice,
+    shouldShowTruncationNotice,
+} from '../../lib/contextManager';
+
+export interface ProcessFinalAnswerInput {
+    /** Raw `cleaned.visible` from extractVisibleFinal(). */
+    visibleAnswer: string;
+    prompt: string | null;
+    secondBrainTrace: SecondBrainTrace | null;
+    localPathContext: string;
+    activeBrain: BrainProfile;
+    brainFiles: string[];
+    finishStopReason: string | undefined;
+    maxOutputTokens: number;
+    /** From earlier phases — used in logError noise. */
+    actualModel: string;
+    engine: string;
+    inputTokens: number;
+}
+
+export interface ProcessFinalAnswerResult {
+    /** post-stripMarkdown 1차 — agent.ts 의 `executeActions(cleanedVisible, …)` 호출에 그대로 전달. */
+    cleanedVisible: string;
+    /** post-enforcers, pre-final-stripMarkdown — used for executeActions and history. */
+    assistantContent: string;
+    /** post-stripMarkdown-FINAL — emitted to webview. */
+    finalAssistantContent: string;
+    rationale: ReturnType<typeof parseRationale>;
+    outputTokens: number;
+    stopKind: ReturnType<typeof classifyStopReason>;
+}
+
+export function processFinalAnswer(input: ProcessFinalAnswerInput): ProcessFinalAnswerResult {
+    const {
+        visibleAnswer,
+        prompt,
+        secondBrainTrace,
+        localPathContext,
+        activeBrain,
+        brainFiles,
+        finishStopReason,
+        maxOutputTokens,
+        actualModel,
+        engine,
+        inputTokens,
+    } = input;
+
+    // [Plain Text Output] outputFormat='plain' (기본)이면 모델이 무심코 내보낸
+    // 마크다운 마커(`##`, `**`, `> `, `* ` …) 를 후처리로 모두 제거. 라벨 텍스트는 유지.
+    // markdown 모드면 legacy 그대로 통과.
+    const cleanedVisible = getConfig().outputFormat === 'plain'
+        ? stripMarkdownFormatting(visibleAnswer)
+        : visibleAnswer;
+
+    // 5. Execute Actions
+    const rationale = parseRationale(cleanedVisible);
+    let assistantContent = enforceLocalPathReviewAnswer(
+        enforceProjectClaimPolicyInAnswer(
+            sanitizeAssistantContent(cleanedVisible),
+            secondBrainTrace
+        ),
+        localPathContext
+    );
+    if (prompt && isSecondBrainInventoryRequest(prompt) && brainFiles.length > 0 && isNoBrainDataRefusal(assistantContent)) {
+        assistantContent = buildSecondBrainInventoryFallbackAnswer(activeBrain, brainFiles, secondBrainTrace);
+    }
+    // Note: a previous implementation replaced LLM review answers with a
+    // hardcoded Korean template whenever the answer didn't match enough
+    // keywords. That made every review feel canned and project-agnostic
+    // (the template was Datacollector-flavored). We now let the LLM's
+    // answer stand — the system prompt for review-evaluation
+    // (buildLocalProjectIntentGuidance / buildAstraStanceContext) is
+    // strong enough to keep the response concrete.
+    if (prompt && localPathContext && isProjectKnowledgeCreationRequest(prompt)) {
+        const record = writeProjectKnowledgeRecord(localPathContext);
+        if (isBlockingProjectKnowledgeAnswer(assistantContent)) {
+            assistantContent = buildProjectKnowledgeFallbackAnswer(localPathContext, record);
+        } else if (record && !assistantContent.includes(record.filePath)) {
+            assistantContent = [
+                assistantContent,
+                '',
+                '## 생성된 기록',
+                `프로젝트 지식 기록을 생성했습니다: \`${record.filePath}\``
+            ].join('\n');
+        }
+    }
+    // Surface truncated/abnormal generation so the user knows the answer is incomplete.
+    const stopKind = classifyStopReason(finishStopReason);
+    if (stopKind === 'output-limit' || stopKind === 'context-overflow' || stopKind === 'error') {
+        logError('Generation stopped abnormally.', {
+            model: actualModel, engine, stopReason: finishStopReason, stopKind,
+            inputTokens, maxOutputTokens, answerChars: assistantContent.length,
+        });
+    }
+    const outputTokens = estimateTokens(assistantContent);
+    // Show the "incomplete" notice when the engine said output-limit/context-overflow/error,
+    // OR when (after all auto-continuation rounds) the answer still plainly ends mid-sentence.
+    const notice =
+        shouldShowTruncationNotice(stopKind, outputTokens, maxOutputTokens) ? truncationNotice(stopKind)
+        : looksCutOff(assistantContent) ? truncationNotice('output-limit')
+        : '';
+    if (notice && assistantContent.trim()) {
+        assistantContent = assistantContent.trimEnd() + notice;
+    }
+    // [Plain Text Output — FINAL pass] enforcer 들이 `## 경로 확인 결과` 같은 하드코딩 헤더를
+    // 다시 prepend 한 후에도 마커가 남지 않도록, webview / chatHistory 에 들어가는 최종 문자열을
+    // 한 번 더 sanitize. cleanedVisible 단계의 1차 sanitize 는 model 출력 자체를 정리하고,
+    // 이 2차 sanitize 는 enforcer 출력까지 모두 청소한다.
+    const finalAssistantContent = getConfig().outputFormat === 'plain'
+        ? stripMarkdownFormatting(assistantContent)
+        : assistantContent;
+
+    return {
+        cleanedVisible,
+        assistantContent,
+        finalAssistantContent,
+        rationale,
+        outputTokens,
+        stopKind,
+    };
+}
@@ -0,0 +1,101 @@
+import * as vscode from 'vscode';
+import { ChatMessage } from '../../agent';
+import { buildApiUrl, summarizeText } from '../../utils';
+import { buildEngineMessageVariants } from '../../lib/contextBuilders/engineMessages';
+import { samplingToRestBody } from '../../lmstudio/streamer';
+import { lmStudioSamplingFromConfig } from '../../lib/contextBuilders/lmStudioSampling';
+
+export interface CallNonStreamingDeps {
+    context: vscode.ExtensionContext;
+}
+
+export async function callNonStreaming(deps: CallNonStreamingDeps, params: {
+    baseUrl: string;
+    modelName: string;
+    engine: 'lmstudio' | 'ollama';
+    messages: ChatMessage[];
+    temperature: number;
+    maxTokens?: number;
+    contextLength?: number;
+    signal?: AbortSignal;
+}): Promise<{ text: string; stopReason?: string }> {
+    const { baseUrl, modelName, engine, messages, temperature, signal } = params;
+    const maxTokens = Math.max(256, params.maxTokens ?? 4096);
+
+    // Cloud routing — streaming Response 를 받아 끝까지 모아서 텍스트로 환원.
+    // Non-streaming 전용 endpoint 를 따로 두지 않고 stream 결과를 모으는 게 단순.
+    try {
+        const { parseModelPrefix, streamCloudCompletion } =
+            require('../../features/providers') as typeof import('../../features/providers');
+        const hit = parseModelPrefix(modelName);
+        if (hit) {
+            const response = await streamCloudCompletion(deps.context, hit, {
+                messages: messages.map((m) => ({ role: m.role as any, content: m.content })),
+                temperature,
+                maxTokens,
+                signal,
+            });
+            if (!response.ok) {
+                const errText = await response.text().catch(() => '');
+                throw new Error(`Cloud (${hit.provider}) ${response.status}: ${summarizeText(errText, 200)}`);
+            }
+            // OpenAI 호환 SSE 를 통째로 읽어 delta.content 합치기.
+            const raw = await response.text();
+            let acc = '';
+            for (const line of raw.split('\n')) {
+                const t = line.trim();
+                if (!t.startsWith('data:')) continue;
+                const payload = t.slice(5).trim();
+                if (!payload || payload === '[DONE]') continue;
+                try {
+                    const obj = JSON.parse(payload);
+                    const delta = obj?.choices?.[0]?.delta?.content;
+                    if (typeof delta === 'string') acc += delta;
+                } catch { /* skip malformed */ }
+            }
+            return { text: acc, stopReason: 'stop' };
+        }
+    } catch (e) {
+        const msg = (e as Error)?.message ?? '';
+        if (msg.startsWith('Cloud (')) throw e;
+    }
+
+    const numCtx = Math.max(2048, params.contextLength ?? 32768);
+    const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
+    const variants = buildEngineMessageVariants(messages, engine);
+    const sampling = samplingToRestBody(lmStudioSamplingFromConfig());
+    const body = {
+        model: modelName,
+        messages: variants[0].messages,
+        stream: false,
+        ...(engine === 'lmstudio'
+            ? { max_tokens: maxTokens, temperature, ...sampling }
+            : { options: { num_ctx: numCtx, num_predict: maxTokens, temperature, ...sampling } }),
+    };
+    const response = await fetch(apiUrl, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify(body),
+        signal,
+    });
+    if (!response.ok) {
+        const errText = await response.text().catch(() => '');
+        throw new Error(`Non-streaming fallback returned ${response.status}: ${summarizeText(errText, 200)}`);
+    }
+    const text = await response.text();
+    try {
+        const json = JSON.parse(text);
+        if (engine === 'lmstudio') {
+            return {
+                text: json?.choices?.[0]?.message?.content ?? '',
+                stopReason: json?.choices?.[0]?.finish_reason,
+            };
+        }
+        return {
+            text: json?.message?.content ?? json?.response ?? '',
+            stopReason: json?.done_reason ?? (json?.done === true ? 'stop' : undefined),
+        };
+    } catch {
+        return { text: '' };
+    }
+}
@@ -0,0 +1,169 @@
+import * as vscode from 'vscode';
+import {
+    buildApiUrl,
+    logError,
+    logInfo,
+    resolveEngine,
+    summarizeText,
+} from '../../utils';
+import { buildEngineMessageVariants } from '../../lib/contextBuilders/engineMessages';
+import { buildModelCandidates } from '../../lib/contextBuilders/modelCandidates';
+import { samplingToRestBody } from '../../lmstudio/streamer';
+import { lmStudioSamplingFromConfig } from '../../lib/contextBuilders/lmStudioSampling';
+import type { ChatMessage } from '../../agent';
+
+export interface CreateStreamingRequestDeps {
+    context: vscode.ExtensionContext;
+    getAbortSignal: () => AbortSignal | undefined;
+}
+
+export async function createStreamingRequest(deps: CreateStreamingRequestDeps, params: {
+    baseUrl: string;
+    modelName: string;
+    reqMessages: ChatMessage[];
+    temperature: number;
+    /** Dynamic output-token cap computed from the remaining context budget. */
+    maxTokens?: number;
+    /** Model context window in tokens (used for Ollama's num_ctx). */
+    contextLength?: number;
+}): Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string }> {
+    const { baseUrl, modelName, reqMessages, temperature } = params;
+    const maxTokens = Math.max(256, params.maxTokens ?? 4096);
+
+    // Cloud provider 라우팅 — model id 가 'openrouter:' / 'anthropic:' / 'gemini:' 로 시작하면
+    // 해당 adapter 호출. body 는 OpenAI 호환 SSE 로 transform 되어 반환되므로
+    // 아래 로컬 엔진 경로의 consumer 가 동일하게 처리.
+    try {
+        const { parseModelPrefix, streamCloudCompletion } =
+            require('../../features/providers') as typeof import('../../features/providers');
+        const hit = parseModelPrefix(modelName);
+        if (hit) {
+            logInfo('AI streaming request (cloud).', { provider: hit.provider, model: hit.model });
+            const response = await streamCloudCompletion(deps.context, hit, {
+                messages: reqMessages.map((m) => ({ role: m.role as any, content: m.content })),
+                temperature,
+                maxTokens,
+                signal: deps.getAbortSignal(),
+            });
+            if (!response.ok) {
+                const errText = await response.text();
+                throw new Error(`Cloud (${hit.provider}) ${response.status}: ${summarizeText(errText, 300)}`);
+            }
+            return { response, engine: 'lmstudio', apiUrl: `cloud://${hit.provider}/${hit.model}` };
+        }
+    } catch (e) {
+        // 모듈 로드 실패 / 매칭 안 됨 — 로컬 경로로 fall through.
+        // (단, 명시적으로 cloud routing 했는데 실패한 경우는 throw 되어 위에서 catch 됨.)
+        const msg = (e as Error)?.message ?? '';
+        if (msg.startsWith('Cloud (')) throw e;
+    }
+
+    const numCtx = Math.max(2048, params.contextLength ?? 32768);
+    const engine = resolveEngine(baseUrl);  // 사용자가 설정한 엔진만 사용
+    const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
+    const messageVariants = buildEngineMessageVariants(reqMessages, engine);
+    const modelCandidates = buildModelCandidates(modelName, engine);
+    let lastError: Error | null = null;
+
+    // 같은 엔진 내에서만 model candidate / message variant retry
+    for (const candidateModel of modelCandidates) {
+        for (const variant of messageVariants) {
+            const sampling = samplingToRestBody(lmStudioSamplingFromConfig());
+            const streamBody = {
+                model: candidateModel,
+                messages: variant.messages,
+                stream: true,
+                ...(engine === 'lmstudio'
+                    // LM Studio's OpenAI-compatible REST extends the schema with top_k/min_p/
+                    // repeat_penalty (same names as Ollama). Spread the shared sampling block so
+                    // the REST fallback matches the SDK path — without it a fallback after a
+                    // dead handle quietly loses the glitch-suppression preset.
+                    ? { max_tokens: maxTokens, temperature, ...sampling }
+                    : { options: { num_ctx: numCtx, num_predict: maxTokens, temperature, ...sampling } }),
+            };
+
+            // 일시적 네트워크 오류용 retry (최대 2회, 지수 backoff)
+            const MAX_RETRIES = 2;
+            let serviceDown = false;
+            for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+                try {
+                    if (attempt > 0) {
+                        const delay = 500 * Math.pow(2, attempt - 1);  // 500ms, 1000ms
+                        await new Promise(r => setTimeout(r, delay));
+                        logInfo('AI streaming request retry.', { engine, attempt, model: candidateModel });
+                    }
+                    logInfo('AI streaming request started.', {
+                        engine, apiUrl, model: candidateModel,
+                        variant: variant.name, messageCount: variant.messages.length,
+                        attempt
+                    });
+                    const response = await fetch(apiUrl, {
+                        method: 'POST',
+                        headers: {
+                            'Content-Type': 'application/json',
+                            'Accept': 'text/event-stream',
+                            'Cache-Control': 'no-cache',
+                            'Connection': 'keep-alive'
+                        },
+                        body: JSON.stringify(streamBody),
+                        signal: deps.getAbortSignal(),
+                        keepalive: true
+                    });
+
+                    if (!response.ok) {
+                        const errText = await response.text();
+                        lastError = new Error(`AI Engine error (${engine}/${variant.name}): ${response.status} - ${summarizeText(errText, 300)}`);
+                        logError('AI streaming request returned non-OK status.', {
+                            engine, variant: variant.name, apiUrl,
+                            status: response.status, body: summarizeText(errText, 500)
+                        });
+                        // 4xx는 재시도해도 의미없음. 5xx만 재시도.
+                        if (response.status >= 400 && response.status < 500) break;
+                        continue;
+                    }
+
+                    logInfo('AI streaming request connected.', { engine, variant: variant.name, apiUrl });
+                    return { response, engine, apiUrl };
+                } catch (error: any) {
+                    lastError = error instanceof Error ? error : new Error(String(error));
+                    // AbortError는 사용자가 취소한 것이므로 retry 금지
+                    if (lastError.name === 'AbortError') {
+                        throw lastError;
+                    }
+                    // ECONNREFUSED / DNS-level failures mean the engine process isn't even
+                    // listening — no amount of retries or message-variant juggling will help.
+                    // Abandon the candidate/variant loops now and surface the "is X running?"
+                    // error fast instead of burning 12 fetch attempts before giving up.
+                    const errCode = (error?.cause?.code ?? error?.code ?? '').toString();
+                    const errMsg = lastError.message;
+                    if (
+                        errCode === 'ECONNREFUSED' || errCode === 'ENOTFOUND' || errCode === 'EAI_AGAIN'
+                        || /ECONNREFUSED|ENOTFOUND|getaddrinfo|fetch failed/i.test(errMsg)
+                    ) {
+                        serviceDown = true;
+                        logError('AI streaming request: engine appears to be down.', {
+                            engine, apiUrl, code: errCode, error: errMsg,
+                        });
+                        break; // exit retry loop
+                    }
+                    logError('AI streaming request failed.', {
+                        engine, variant: variant.name, apiUrl, model: candidateModel,
+                        attempt, error: lastError.message
+                    });
+                }
+            }
+            if (serviceDown) break; // skip remaining variants
+        }
+        // serviceDown also short-circuits the model-candidate loop — there is no
+        // candidate / variant the engine can answer if it isn't listening at all.
+        if (lastError && /ECONNREFUSED|ENOTFOUND|fetch failed/i.test(lastError.message)) break;
+    }
+
+    // 명확한 에러 메시지: 어느 엔진이 실패했는지 사용자에게 알림
+    const engineLabel = engine === 'lmstudio' ? 'LM Studio' : 'Ollama';
+    throw new Error(
+        `${engineLabel} 엔진에 연결할 수 없습니다. ` +
+        `${engineLabel}가 실행 중이고 모델 '${modelName}'이 로드되어 있는지 확인하세요. ` +
+        `(원인: ${lastError?.message || 'unknown'})`
+    );
+}
@@ -0,0 +1,74 @@
+import * as vscode from 'vscode';
+import { logInfo } from '../../utils';
+import { ChatMessage } from '../../agent';
+
+export interface DevilRebuttalDeps {
+    getAbortSignal: () => AbortSignal | undefined;
+    callNonStreaming: (params: {
+        baseUrl: string;
+        modelName: string;
+        engine: 'lmstudio' | 'ollama';
+        messages: ChatMessage[];
+        temperature: number;
+        maxTokens?: number;
+        contextLength?: number;
+        signal?: AbortSignal;
+    }) => Promise<{ text: string; stopReason?: string }>;
+    getWebview: () => vscode.Webview | undefined;
+}
+
+/**
+ * Devil Agent 반박 emit — main turn 완료 직후 호출 (fire-and-forget).
+ * 비활성 시 즉시 return. 활성 시 별도 LLM 호출 (callNonStreaming 재사용) 로 짧은 비판 생성.
+ * 성공 시 webview 에 'devilRebuttal' 메시지 전송 → UI 가 카드로 렌더.
+ */
+export async function maybeEmitDevilRebuttal(deps: DevilRebuttalDeps, opts: {
+    userPrompt: string;
+    assistantAnswer: string;
+    baseUrl: string;
+    modelName: string;
+    contextLength: number;
+    engine: 'lmstudio' | 'ollama';
+}): Promise<void> {
+    try {
+        const { isDevilAgentEnabled, generateDevilRebuttal, DEVIL_PERSONA_NAME } =
+            await import('../../features/devilAgent');
+        if (!isDevilAgentEnabled()) return;
+        if (!opts.userPrompt.trim() || !opts.assistantAnswer.trim()) return;
+        // Local callLLM wrapper — callNonStreaming 재사용 (cloud / local 자동 라우팅).
+        const callLLM = async (system: string, userMessage: string, maxTokens: number) => {
+            const r = await deps.callNonStreaming({
+                baseUrl: opts.baseUrl,
+                modelName: opts.modelName,
+                engine: opts.engine,
+                messages: [
+                    { role: 'system', content: system },
+                    { role: 'user', content: userMessage },
+                ],
+                temperature: 0.7,
+                maxTokens,
+                contextLength: opts.contextLength,
+                signal: deps.getAbortSignal(),
+            });
+            return r.text;
+        };
+        const rebuttal = await generateDevilRebuttal(callLLM, {
+            userPrompt: opts.userPrompt,
+            assistantAnswer: opts.assistantAnswer,
+        });
+        if (!rebuttal) return;
+        deps.getWebview()?.postMessage({
+            type: 'devilRebuttal',
+            value: {
+                persona: DEVIL_PERSONA_NAME,
+                text: rebuttal,
+                // 사용자가 '재반박' 누를 때 원래 컨텍스트로 돌아갈 수 있게 stash.
+                userPrompt: opts.userPrompt,
+                assistantAnswer: opts.assistantAnswer,
+            },
+        });
+    } catch (e: any) {
+        // Devil 실패는 main 답변에 영향 없음 — silent log.
+        logInfo('Devil rebuttal skipped.', { error: e?.message ?? String(e) });
+    }
+}
@@ -0,0 +1,136 @@
+import * as vscode from 'vscode';
+import { logError, summarizeText } from '../../utils';
+import { lmStudioSamplingFromConfig, lmStudioRespondExtrasFromConfig } from '../../lib/contextBuilders/lmStudioSampling';
+import type { AgentExecutorOptions, ChatMessage } from '../../agent';
+
+export interface StreamChatOnceDeps {
+    options: AgentExecutorOptions;
+    getWebview: () => vscode.Webview | undefined;
+    isStaleRun: (runId: number) => boolean;
+    createStreamingRequest: (params: {
+        baseUrl: string;
+        modelName: string;
+        reqMessages: ChatMessage[];
+        temperature: number;
+        /** Dynamic output-token cap computed from the remaining context budget. */
+        maxTokens?: number;
+        /** Model context window in tokens (used for Ollama's num_ctx). */
+        contextLength?: number;
+    }) => Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string }>;
+}
+
+export async function streamChatOnce(deps: StreamChatOnceDeps, params: {
+    runId: number;
+    useLmStudioSdk: boolean;
+    engine: 'lmstudio' | 'ollama';
+    ollamaUrl: string;
+    modelName: string;
+    messages: ChatMessage[];
+    temperature: number;
+    maxTokens: number;
+    contextLength: number;
+    contextOverflowPolicy: 'stopAtLimit' | 'truncateMiddle' | 'rollingWindow';
+    signal: AbortSignal;
+    postLiveDeltas: boolean;
+}): Promise<{ text: string; stopReason?: string; aborted: boolean }> {
+    let accumulated = '';
+    let finishStopReason: string | undefined;
+    const post = (token: string) => {
+        if (params.postLiveDeltas && token) {
+            deps.getWebview()?.postMessage({ type: 'streamChunk', value: token });
+        }
+    };
+
+    if (params.useLmStudioSdk) {
+        try {
+            const stream = deps.options.lmStudioStreamer!.stream({
+                modelName: params.modelName,
+                messages: params.messages.map((m) => ({ role: m.role, content: m.content })),
+                temperature: params.temperature,
+                maxTokens: params.maxTokens,
+                contextOverflowPolicy: params.contextOverflowPolicy,
+                ...lmStudioSamplingFromConfig(),
+                ...lmStudioRespondExtrasFromConfig(),
+                signal: params.signal,
+            });
+            for await (const { token, stopReason } of stream) {
+                if (deps.isStaleRun(params.runId)) {
+                    return { text: accumulated, stopReason: finishStopReason, aborted: true };
+                }
+                if (token) {
+                    accumulated += token;
+                    post(token);
+                }
+                if (stopReason) finishStopReason = stopReason;
+            }
+        } catch (err: any) {
+            if (err?.name === 'AbortError' || params.signal.aborted) {
+                return { text: accumulated, stopReason: finishStopReason, aborted: true };
+            }
+            const msg = err?.message ?? String(err);
+            if (/context\s*length|contextlengthreached|exceed|too\s*long/i.test(msg)) {
+                finishStopReason = 'contextLengthReached';
+            }
+            logError('streamChatOnce SDK path failed.', { engine: params.engine, error: msg });
+            throw err;
+        }
+        return { text: accumulated, stopReason: finishStopReason, aborted: false };
+    }
+
+    const request = await deps.createStreamingRequest({
+        baseUrl: params.ollamaUrl,
+        modelName: params.modelName,
+        reqMessages: params.messages,
+        temperature: params.temperature,
+        maxTokens: params.maxTokens,
+        contextLength: params.contextLength,
+    });
+    const reader = request.response.body?.getReader();
+    if (!reader) throw new Error('Response body is not readable.');
+    const decoder = new TextDecoder();
+    let buffer = '';
+    const consumeJsonLine = (line: string) => {
+        const trimmed = line.trim();
+        if (!trimmed || trimmed === 'data: [DONE]') return;
+        try {
+            const raw = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
+            const json = JSON.parse(raw);
+            const token = params.engine === 'lmstudio'
+                ? json.choices?.[0]?.delta?.content || ''
+                : json.message?.content || json.response || '';
+            if (token) {
+                accumulated += token;
+                post(token);
+            }
+            const fr = params.engine === 'lmstudio'
+                ? json.choices?.[0]?.finish_reason
+                : (json.done_reason ?? (json.done === true ? 'stop' : undefined));
+            if (fr) finishStopReason = fr;
+        } catch (e: any) {
+            logError('streamChatOnce: failed to parse chunk.', { engine: params.engine, chunk: summarizeText(trimmed, 200), error: e?.message ?? String(e) });
+        }
+    };
+    try {
+        while (true) {
+            const { done, value } = await reader.read();
+            if (done) break;
+            if (deps.isStaleRun(params.runId)) {
+                return { text: accumulated, stopReason: finishStopReason, aborted: true };
+            }
+            buffer += decoder.decode(value, { stream: true });
+            const lines = buffer.split('\n');
+            buffer = lines.pop() || '';
+            for (const line of lines) consumeJsonLine(line);
+        }
+        if (buffer.trim()) consumeJsonLine(buffer);
+    } catch (err: any) {
+        if (err?.name === 'AbortError') {
+            return { text: accumulated, stopReason: finishStopReason, aborted: true };
+        }
+        logError('streamChatOnce REST path failed.', { engine: params.engine, error: err?.message ?? String(err) });
+        throw err;
+    } finally {
+        try { reader.releaseLock(); } catch { /* already released on abort */ }
+    }
+    return { text: accumulated, stopReason: finishStopReason, aborted: false };
+}
@@ -0,0 +1,56 @@
+import * as vscode from 'vscode';
+import { getActiveBrainProfile, logError, logInfo } from '../utils';
+import { BrainProfile, getConfig } from '../config';
+import { SessionManager } from '../core/session';
+import { ChatMessage } from '../agent';
+
+export interface RestoreLastSessionDeps {
+    sessionManager: SessionManager;
+    setChatHistory: (history: ChatMessage[]) => void;
+    setCurrentTaskId: (taskId: string) => void;
+}
+
+export async function restoreLastSession(deps: RestoreLastSessionDeps): Promise<void> {
+    try {
+        const lastSession = deps.sessionManager.loadLastActiveSession();
+        if (lastSession) {
+            deps.setChatHistory(lastSession.history);
+            deps.setCurrentTaskId(lastSession.taskId);
+            logInfo(`Restored last session: ${lastSession.taskId}`);
+        }
+    } catch (error) {
+        logError('Failed to restore last session. Starting fresh.', error);
+    }
+}
+
+export interface ExecuteActionTagsOnTextDeps {
+    executeActions: (aiMessage: string, rootPath: string, activeBrain: BrainProfile) => Promise<string[]>;
+}
+
+export async function executeActionTagsOnText(
+    deps: ExecuteActionTagsOnTextDeps,
+    aiMessage: string
+): Promise<string[]> {
+    const cfg = getConfig();
+    const rootPath = vscode.workspace.workspaceFolders?.[0]?.uri.fsPath
+        || cfg.localBrainPath
+        || process.cwd();
+    const activeBrain = getActiveBrainProfile();
+    try {
+        return await deps.executeActions(aiMessage, rootPath, activeBrain);
+    } catch (e: any) {
+        logError('executeActionTagsOnText failed.', { error: e?.message ?? String(e) });
+        return [`❌ Action 실행 중 오류: ${e?.message ?? e}`];
+    }
+}
+
+export function syncBrain(brainDir: string): void {
+    try {
+        const { execSync } = require('child_process');
+        execSync(`git add .`, { cwd: brainDir });
+        execSync(`git commit -m "[Astra] Knowledge Update"`, { cwd: brainDir });
+        execSync(`git push`, { cwd: brainDir });
+    } catch (err) {
+        logError('Second Brain sync failed.', err);
+    }
+}
@@ -0,0 +1,104 @@
+import { getConfig } from '../../config';
+import { logError, resolveEngine } from '../../utils';
+import { estimateMessagesTokens, computeOutputBudget } from '../../lib/contextManager';
+import { lmStudioSamplingFromConfig, lmStudioRespondExtrasFromConfig } from '../../lib/contextBuilders/lmStudioSampling';
+import { AGENT_PROMPTS, type AgentRole, type AgentExecutorOptions, type ChatMessage } from '../../agent';
+
+export interface CallRoleAgentDeps {
+    getAbortSignal: () => AbortSignal | undefined;
+    createStreamingRequest: (params: {
+        baseUrl: string;
+        modelName: string;
+        reqMessages: ChatMessage[];
+        temperature: number;
+        maxTokens?: number;
+        contextLength?: number;
+    }) => Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string }>;
+    options: AgentExecutorOptions;
+}
+
+export async function callRoleAgent(deps: CallRoleAgentDeps, role: AgentRole, prompt: string, modelName: string, options: any): Promise<string> {
+    const persona = AGENT_PROMPTS[role];
+    const { ollamaUrl, contextLength, maxOutputTokens, contextSafetyMargin, contextOverflowPolicy } = getConfig();
+
+    const messages: ChatMessage[] = [
+        { role: 'system', content: persona },
+        { role: 'user', content: prompt }
+    ];
+    // Dynamic output cap so input + output stays within the context window.
+    const inputTokens = estimateMessagesTokens(messages);
+    const { maxOutputTokens: subMaxTokens } = computeOutputBudget(inputTokens, {
+        contextLength, maxOutputTokens, safetyMargin: contextSafetyMargin, minOutputTokens: 512,
+    });
+
+    const engine = resolveEngine(ollamaUrl);
+    let responseText = '';
+
+    if (engine === 'lmstudio' && deps.options.lmStudioStreamer) {
+        try {
+            const stream = deps.options.lmStudioStreamer.stream({
+                modelName,
+                messages: messages.map((m) => ({ role: m.role, content: m.content })),
+                temperature: 0.3,
+                maxTokens: subMaxTokens,
+                contextOverflowPolicy,
+                ...lmStudioSamplingFromConfig(),
+                ...lmStudioRespondExtrasFromConfig(),
+                signal: deps.getAbortSignal(),
+            });
+            let subStopReason: string | undefined;
+            for await (const { token, stopReason } of stream) {
+                if (token) responseText += token;
+                if (stopReason) subStopReason = stopReason;
+            }
+            // Sub-agent answers that got cut mid-sentence corrupt the pipeline silently
+            // (Planner produces a half-step, Writer can't recover). Surface a warn log so
+            // the operator can raise subMaxTokens or pick a less aggressive output budget.
+            if (subStopReason && /maxPredicted|context|truncat/i.test(subStopReason)) {
+                logError('Sub-agent answer hit a generation limit.', {
+                    role, model: modelName, stopReason: subStopReason,
+                    chars: responseText.length, maxTokens: subMaxTokens,
+                });
+            }
+            return responseText;
+        } catch (err: any) {
+            if (err?.name === 'AbortError' || deps.getAbortSignal()?.aborted) return responseText;
+            logError('LM Studio SDK callAgent stream failed.', { role, error: err?.message ?? String(err) });
+            throw err;
+        }
+    }
+
+    const request = await deps.createStreamingRequest({
+        baseUrl: ollamaUrl,
+        modelName: modelName,
+        reqMessages: messages,
+        temperature: 0.3, // Use lower temperature for planning and research
+        maxTokens: subMaxTokens,
+        contextLength
+    });
+
+    const reader = request.response.body?.getReader();
+    if (!reader) throw new Error("Agent response body is not readable.");
+
+    const decoder = new TextDecoder();
+    try {
+        while (true) {
+            const { done, value } = await reader.read();
+            if (done) break;
+            const chunk = decoder.decode(value, { stream: true });
+            const lines = chunk.split('\n');
+            for (const line of lines) {
+                const trimmed = line.trim();
+                if (!trimmed || trimmed === 'data: [DONE]') continue;
+                try {
+                    const json = JSON.parse(trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed);
+                    const content = json.choices?.[0]?.delta?.content || json.message?.content || '';
+                    responseText += content;
+                } catch (e) { }
+            }
+        }
+    } finally {
+        try { reader.releaseLock(); } catch { /* already released */ }
+    }
+    return responseText;
+}
@@ -0,0 +1,117 @@
+import * as vscode from 'vscode';
+import { findBrainFiles, getActiveBrainProfile, logError } from '../../utils';
+import { getConfig } from '../../config';
+import { AgentWorkflowManager } from '../../agents/AgentWorkflowManager';
+import { ErrorTranslator } from '../../core/errorHandler';
+import { StatusBarManager, AgentStatus } from '../../core/statusBar';
+import { stripMarkdownFormatting } from '../../core/responseRecovery';
+import type { AgentExecutorOptions, ChatMessage } from '../../agent';
+
+export interface WorkflowDeps {
+    emitHistoryChanged: () => void;
+    chatHistory: ChatMessage[];
+    options: AgentExecutorOptions;
+    statusBarManager: StatusBarManager;
+    getWebview: () => vscode.Webview | undefined;
+    getAbortSignal: () => AbortSignal | undefined;
+}
+
+export async function executeMultiAgentWorkflow(
+    deps: WorkflowDeps,
+    prompt: string,
+    modelName: string,
+    options: any
+) {
+    if (!deps.getWebview()) return;
+    // NOTE: 호출자 (AgentExecutor wrapper) 가 stop() + new AbortController() 를
+    // *먼저* 마쳐야 한다 — extracted fn 내부에서 stop 을 부르면 호출자가 막
+    // 만든 controller 가 즉시 폐기되기 때문. getAbortSignal() 은 그 새 controller 의
+    // signal 을 반환해야 함.
+    const signal = deps.getAbortSignal();
+    if (!signal) return;
+
+    const webview = deps.getWebview();
+    if (!webview) return;
+
+    deps.statusBarManager.updateStatus(AgentStatus.Thinking, 'Multi-Agent Workflow Running');
+    webview.postMessage({ type: 'streamStart' });
+    deps.options.onStreamLifecycle?.start();
+
+    try {
+        let brainContext = 'No specific context available';
+        try {
+            const config = getConfig();
+            const activeBrain = options.brainProfileId
+                ? (config.brainProfiles.find((profile) => profile.id === options.brainProfileId) || getActiveBrainProfile())
+                : getActiveBrainProfile();
+            const brainFiles = findBrainFiles(activeBrain.localBrainPath);
+            brainContext = `Brain: ${activeBrain.name}, Files: ${brainFiles.length}`;
+        } catch (ctxErr) {
+            logError('Failed to load brain context for agents', ctxErr);
+        }
+
+        const selectedAgentContext = options.agentSkillContext
+            ? `\nSelected Agent Reference:\n${options.agentSkillContext}`
+            : '';
+        const designerContext = options.designerContext
+            ? `\nProject Chronicle Guard:\n${options.designerContext}`
+            : '';
+
+        // 워크플로우 매니저에게 설정 기반 실행 위임
+        // [Clean Stream] 단계 진행 메시지는 채팅 본문(streamChunk) 이 아닌 사이드바
+        // 상단의 workflowStage 인디케이터로만 표시한다 → "생각 단계가 본문에 계속 보임"
+        // 답답함 제거. 채팅 버블에는 최종 답변만 한 번에 들어간다.
+        const rawFinalReport = await AgentWorkflowManager.runStrictWorkflow(
+            prompt,
+            modelName,
+            `${brainContext}${selectedAgentContext}${designerContext}`,
+            signal,
+            (step, msg) => {
+                deps.getWebview()?.postMessage({
+                    type: 'workflowStage',
+                    value: { step, message: msg, done: step === '완료' || step === '오류' }
+                });
+            }
+        );
+
+        const wv2 = deps.getWebview();
+        if (signal.aborted || !wv2) return;
+
+        // [Plain Text Output] Synthesizer가 잘 따라줬어도 작은 모델은 `##` `**` 를 흘리는 경우가 있어
+        // 최종 후처리로 한 번 더 마커를 벗긴다. 채팅 history 에도 정제된 결과만 남겨 다음 턴 컨텍스트에서
+        // 마커가 재학습되는 일을 막는다.
+        const finalReport = getConfig().outputFormat === 'plain'
+            ? stripMarkdownFormatting(rawFinalReport)
+            : rawFinalReport;
+
+        wv2.postMessage({ type: 'streamChunk', value: finalReport });
+        wv2.postMessage({ type: 'workflowStage', value: { step: '완료', message: '', done: true } });
+        wv2.postMessage({ type: 'streamEnd' });
+
+        deps.chatHistory.push({ role: 'assistant', content: finalReport });
+        deps.emitHistoryChanged();
+
+        deps.statusBarManager.updateStatus(AgentStatus.Success, 'Workflow Complete');
+        wv2.postMessage({ type: 'autoContinue', value: '✅ 모든 분석이 성공적으로 완료되었습니다.' });
+
+    } catch (error: any) {
+        // 어떤 종료 경로에서든 stage indicator 는 반드시 닫는다 — 안 닫으면 사이드바에 영원히 "③ 자기 검증..." 가 남는다.
+        deps.getWebview()?.postMessage({ type: 'workflowStage', value: { step: '완료', message: '', done: true } });
+        if (error.name === 'AbortError' || error.message?.includes('cancelled')) {
+            deps.statusBarManager.updateStatus(AgentStatus.Idle, 'Workflow Cancelled');
+            return;
+        }
+        const friendly = ErrorTranslator.translate(error);
+        logError('Workflow failed', error);
+
+        const wvErr = deps.getWebview();
+        wvErr?.postMessage({ type: 'autoContinue', value: '' });
+        wvErr?.postMessage({
+            type: 'error',
+            value: `### ${friendly.title}\n\n**상태:** ${friendly.message}\n\n**해결 방법:** ${friendly.action}`
+        });
+        deps.statusBarManager.updateStatus(AgentStatus.Idle, 'Error occurred');
+    } finally {
+        deps.options.onStreamLifecycle?.end();
+    }
+}
@@ -0,0 +1,61 @@
+import * as vscode from 'vscode';
+import { logError, logInfo, resolveEngine } from '../../utils';
+import { getConfig } from '../../config';
+import type { ChatMessage } from '../../agent';
+
+export interface CompressSummaryDeps {
+    context: vscode.ExtensionContext;
+    callNonStreaming: (params: {
+        baseUrl: string;
+        modelName: string;
+        engine: 'lmstudio' | 'ollama';
+        messages: ChatMessage[];
+        temperature: number;
+        maxTokens?: number;
+        contextLength?: number;
+        signal?: AbortSignal;
+    }) => Promise<{ text: string; stopReason?: string }>;
+}
+
+export async function compressSessionSummary(deps: CompressSummaryDeps, taskId: string, history: ChatMessage[]): Promise<void> {
+    const visible = history.filter((m) => !m.internal && (m.role === 'user' || m.role === 'assistant'));
+    if (visible.length < 3) return;
+    const cfg = getConfig();
+    const transcript = visible
+        .map((m) => `${m.role.toUpperCase()}: ${String(m.content).replace(/\s+/g, ' ').slice(0, 400)}`)
+        .join('\n\n');
+    const messages: ChatMessage[] = [
+        {
+            role: 'system',
+            content: [
+                'You compress chat transcripts into a 2-3 sentence summary.',
+                'Capture: (1) the user\'s topic or task, (2) the main decision or answer reached, (3) any open issue.',
+                'Reply in the user\'s primary language (mirror Korean ↔ English exactly as in the transcript).',
+                'Reply with ONLY the summary text. No headers, no quotes, no preamble.',
+            ].join(' '),
+            internal: true,
+        },
+        { role: 'user', content: `[TRANSCRIPT]\n${transcript}\n[END]` },
+    ];
+    try {
+        const result = await deps.callNonStreaming({
+            baseUrl: cfg.ollamaUrl,
+            modelName: cfg.defaultModel,
+            engine: resolveEngine(cfg.ollamaUrl),
+            messages,
+            temperature: 0.3,
+            maxTokens: 256,
+            contextLength: cfg.contextLength,
+        });
+        const summary = (result.text || '').trim().replace(/^["'`]+|["'`]+$/g, '');
+        if (!summary || summary.length < 12) return;
+        const sessions = deps.context.globalState.get<any[]>('chat_sessions', []) || [];
+        const idx = sessions.findIndex((s) => String(s?.id) === String(taskId));
+        if (idx < 0) return;
+        sessions[idx].summary = summary;
+        await deps.context.globalState.update('chat_sessions', sessions);
+        logInfo('Session summary stored for medium-term recall.', { taskId, length: summary.length });
+    } catch (e: any) {
+        logError('Session summary compression failed.', { taskId, error: e?.message ?? String(e) });
+    }
+}