Update project files

2026-05-22 15:00:14 +09:00
parent 132d130ff1
commit 8016ef18fa
29 changed files with 1353 additions and 804 deletions
@@ -183,6 +183,25 @@ export class AgentExecutor {
    static readonly ABS_PATH_RE = new RegExp(POSIX_ABS_PATH_SRC, 'i');
    static readonly WIN_ABS_PATH_RE = new RegExp(WIN_ABS_PATH_SRC, 'i');

+    /**
+     * Hard cap on retained in-memory chat messages. Older messages beyond this
+     * are dropped (the system/first message is always preserved). Generous so a
+     * normal session is untouched — this only fights unbounded growth in very
+     * long-running sessions. The per-request context budgeter
+     * (`trimHistoryToBudget`) still does the real fitting; this just stops the
+     * array itself from leaking memory across hundreds of turns.
+     */
+    private static readonly MAX_RETAINED_MESSAGES = 40;
+    /**
+     * Older internal tool-result messages (read_file / list_files / list_brain /
+     * read_brain dumps) are the bulkiest part of history and add little once the
+     * conversation has moved on. Anything older than the most recent
+     * `RECENT_FULL_MESSAGES` gets its bulky tool-result content shrunk to this
+     * many characters. Recent messages are kept full for conversation continuity.
+     */
+    private static readonly RECENT_FULL_MESSAGES = 16;
+    private static readonly OLD_TOOL_RESULT_CAP = 600;
+
    private chatHistory: ChatMessage[] = [];
    private abortController: AbortController | null = null;
    private webview: vscode.Webview | undefined;
@@ -225,9 +244,10 @@ export class AgentExecutor {

        // Initialize 5-Layer Cognitive Memory System
        const activeBrain = getActiveBrainProfile();
+        const initConfig = getConfig();
        this.memoryManager = new MemoryManager(activeBrain.localBrainPath, {
-            enabled: getConfig().memoryEnabled,
-            shortTermLimit: getConfig().memoryShortTermMessages,
+            enabled: initConfig.memoryEnabled,
+            shortTermLimit: initConfig.memoryShortTermMessages,
        });

        // Initialize RAG Pipeline Orchestrator
@@ -495,6 +515,9 @@ export class AgentExecutor {

            // 3. API Request Setup (라인 229에서 이미 추출한 ollamaUrl, configDefaultModel 재사용)
            const actualModel = (modelName && modelName.trim()) || configDefaultModel;
+            // Bound the in-memory history before building the request — shrinks bulky
+            // older tool-result bodies and drops the oldest messages past the cap.
+            this.capChatHistory();
            const reqMessages = this.buildRequestHistory(this.chatHistory);

            // Handle Vision Content Injection
@@ -666,10 +689,22 @@ export class AgentExecutor {
                .reduce((n, m) => n + (Array.isArray(m?.images) ? m.images.length : 0), 0);
            const imageTokenReserve = imageCount * 1024;

+            // Output budget we ACTUALLY reserve before trimming — not the bare
+            // minOutputTokens floor (512). If we only reserve 512, a long session
+            // is allowed to grow the prompt until ~512-1k tokens remain for the
+            // answer; small/MoE local models (e.g. gemma 4B-active) then emit EOS
+            // as the first token and return an empty response. Reserving ~10% of
+            // the window (>=2048) forces history/system trimming to keep a real
+            // answer-sized hole open. Capped at maxOutputTokens.
+            const preferredOutputReserve = Math.min(
+                ctxLimits.maxOutputTokens,
+                Math.max(2048, Math.floor(ctxLimits.contextLength * 0.1))
+            );
+
            // (1) 시스템 프롬프트는 예산의 ~65%까지만 허용 — 그 이상이면 [CONTEXT] 블록부터 잘라낸다.
            const systemCapTokens = Math.max(
                1024,
-                Math.floor((ctxLimits.contextLength - ctxLimits.safetyMargin - ctxLimits.minOutputTokens - imageTokenReserve) * 0.65)
+                Math.floor((ctxLimits.contextLength - ctxLimits.safetyMargin - preferredOutputReserve - imageTokenReserve) * 0.65)
            );
            const { prompt: budgetedSystemPrompt, truncated: systemTruncated } =
                truncateSystemPromptContext(fullSystemPrompt, systemCapTokens);
@@ -681,7 +716,7 @@ export class AgentExecutor {
            // (2) 대화 기록 압축.
            const historyBudget = Math.max(
                256,
-                ctxLimits.contextLength - systemTokens - ctxLimits.safetyMargin - ctxLimits.minOutputTokens - imageTokenReserve
+                ctxLimits.contextLength - systemTokens - ctxLimits.safetyMargin - preferredOutputReserve - imageTokenReserve
            );
            let budgetedHistory: ChatMessage[] = reqMessages;
            if (config.autoCompactHistory) {
@@ -1977,6 +2012,50 @@ export class AgentExecutor {
        ].join('\n');
    }

+    /**
+     * Bound the in-memory `chatHistory` so a very long-running session does not
+     * grow it without limit:
+     *   1. Older internal tool-result messages (the bulky read_file/list_files/…
+     *      dumps) beyond the most recent `RECENT_FULL_MESSAGES` have their content
+     *      truncated — recent messages stay full so continuity is unaffected.
+     *   2. If the array still exceeds `MAX_RETAINED_MESSAGES`, the oldest messages
+     *      are dropped, but a leading system/first message is always preserved so
+     *      session restore and conversation framing are not broken.
+     * This only mutates *internal* (`internal: true`) tool-result bodies and
+     * drops the very oldest entries — it never alters visible user/assistant text
+     * within the retained window, so the request the model sees is unchanged for
+     * any normal-length conversation.
+     */
+    private capChatHistory(): void {
+        const history = this.chatHistory;
+        if (history.length === 0) return;
+
+        // (1) Shrink bulky tool-result bodies of older internal messages.
+        const recentStart = Math.max(0, history.length - AgentExecutor.RECENT_FULL_MESSAGES);
+        for (let i = 0; i < recentStart; i++) {
+            const msg = history[i];
+            if (msg.role !== 'system' || !msg.internal || typeof msg.content !== 'string') continue;
+            // Only the bulky tool-result dumps — leave compaction notices etc. alone.
+            if (!/^\[Result of (read_file|list_files|list_brain|read_brain)\b/.test(msg.content)) continue;
+            if (msg.content.length <= AgentExecutor.OLD_TOOL_RESULT_CAP) continue;
+            msg.content = msg.content.slice(0, AgentExecutor.OLD_TOOL_RESULT_CAP)
+                + '\n…[이전 도구 결과는 컨텍스트 절약을 위해 축약되었습니다]';
+        }
+
+        // (2) Drop the oldest messages once over the hard cap, preserving a
+        //     leading system/first message if present.
+        if (history.length > AgentExecutor.MAX_RETAINED_MESSAGES) {
+            const first = history[0];
+            const preserveFirst = first.role === 'system';
+            const overflow = history.length - AgentExecutor.MAX_RETAINED_MESSAGES;
+            if (preserveFirst) {
+                history.splice(1, overflow);
+            } else {
+                history.splice(0, overflow);
+            }
+        }
+    }
+
    private buildRequestHistory(history: ChatMessage[]): ChatMessage[] {
        return history.map((message) => {
            if (message.role !== 'assistant' || typeof message.content !== 'string') {
@@ -111,15 +111,30 @@ export class BridgeServer {
        });
    }

+    /**
+     * Cached `/ping` payload. `/ping` is a liveness probe that external tools may
+     * hit frequently; the previous implementation did a full brain-corpus walk
+     * plus a config serialize on *every* hit. We now compute that body at most
+     * once per `PING_CACHE_TTL_MS` and reuse it — the response shape (status /
+     * config / brain) is unchanged, ping just stops walking the brain per hit.
+     */
+    private _pingCache: { body: string; expiresAt: number } | null = null;
+    private static readonly PING_CACHE_TTL_MS = 5000;
+
    private handlePing(res: http.ServerResponse) {
-        const brainDir = _getBrainDir();
-        const brainCount = fs.existsSync(brainDir) ? findBrainFiles(brainDir).length : 0;
+        const now = Date.now();
+        if (!this._pingCache || this._pingCache.expiresAt <= now) {
+            const brainDir = _getBrainDir();
+            const brainCount = fs.existsSync(brainDir) ? findBrainFiles(brainDir).length : 0;
+            const body = JSON.stringify({
+                status: 'ok',
+                config: getConfig(),
+                brain: { fileCount: brainCount, enabled: this.provider.brainEnabled }
+            });
+            this._pingCache = { body, expiresAt: now + BridgeServer.PING_CACHE_TTL_MS };
+        }
        res.writeHead(200, { 'Content-Type': 'application/json' });
-        res.end(JSON.stringify({
-            status: 'ok',
-            config: getConfig(),
-            brain: { fileCount: brainCount, enabled: this.provider.brainEnabled }
-        }));
+        res.end(this._pingCache.body);
    }

    private handlePost(req: http.IncomingMessage, res: http.ServerResponse, processor: (data: any, res: http.ServerResponse) => Promise<void>) {
@@ -36,7 +36,7 @@ import { AIService } from './core/services';
 import type { CompanyState } from './features/company';
 import { SettingsPanelProvider } from './features/settings/settingsPanelProvider';
 import { resolveScopeForAgent, openKnowledgeMapEditor } from './skills/agentKnowledgeMap';
-import { getBrainTokenIndex } from './retrieval';
+import { getBrainTokenIndex, clearBrainTokenIndex } from './retrieval';
 import { lessonTemplate, lessonSlug, parseLessonFrontmatter, normalizeLessonTitle, bumpLessonOccurrences } from './retrieval/lessonHelpers';
 import { retrieveScoped, buildContextBlock } from './skills/scopedBrainRetriever';

@@ -835,6 +835,16 @@ export async function activate(context: vscode.ExtensionContext) {
        vscode.workspace.onDidChangeConfiguration((e) => {
            if (e.affectsConfiguration('g1nation')) void settingsPanel.refresh();
        }),
+        // Drop the in-memory brain token index whenever the brain profiles or the
+        // active brain change — a profile edit can repoint `localBrainPath`, so a
+        // stale index keyed by the old path must not linger. The persisted on-disk
+        // index is untouched and reloads lazily on the next query.
+        vscode.workspace.onDidChangeConfiguration((e) => {
+            if (e.affectsConfiguration('g1nation.brainProfiles')
+                || e.affectsConfiguration('g1nation.activeBrainId')) {
+                clearBrainTokenIndex();
+            }
+        }),
        // Same for SecretStorage updates (token saved/cleared from elsewhere).
        context.secrets.onDidChange((e) => {
            if (e.key === TELEGRAM_TOKEN_SECRET_KEY) void settingsPanel.refresh();
@@ -915,6 +925,10 @@ export async function activate(context: vscode.ExtensionContext) {

 export async function deactivate() {
    HealthCheckMonitor.dispose();
+    // Release the in-memory brain token index (and any pending debounced disk
+    // write timer) — the `_states` Map is otherwise never cleared for the
+    // process lifetime.
+    clearBrainTokenIndex();
    if (_telegramBot) {
        try { await _telegramBot.stop(); } catch (e) { logError('Telegram bot stop during deactivate failed.', e); }
        _telegramBot = undefined;
@@ -408,8 +408,31 @@ function move(role,x,y){
  ch.style.left=x+'px';
  ch.style.top=y+'px';
 }
-setInterval(()=>{Object.keys(chars).forEach(k=>{const a=anim[k];if(a.mode==='walk'){a.frame=(a.frame+1)%5;setSprite(k,'walk',a.frame,a.dir)}else if(a.mode==='work'){a.frame=(a.frame+1)%4;setSprite(k,'work',a.frame)} });},286)
-setInterval(()=>{Object.keys(chars).forEach(k=>{const a=anim[k];if(a.mode==='sit'){a.frame=(a.frame+1)%2;setSprite(k,'sit',a.frame)} });},700)
+// ── Managed intervals (pause while the office view is hidden) ──
+// The pixel-office runs several animation/roam/banter intervals. While the
+// webview tab is not visible they do invisible work and keep timers hot —
+// wasteful. _managedInterval registers each one; a visibilitychange handler
+// pauses them all when the document is hidden and resumes them when shown.
+// Behavior while visible is unchanged (same callbacks, same periods).
+const _managedIntervals=[];
+function _managedInterval(fn,ms){
+  const rec={fn:fn,ms:ms,id:null};
+  rec.id=setInterval(fn,ms);
+  _managedIntervals.push(rec);
+  return rec;
+}
+function _pauseManagedIntervals(){
+  for(const rec of _managedIntervals){ if(rec.id!==null){ clearInterval(rec.id); rec.id=null; } }
+}
+function _resumeManagedIntervals(){
+  for(const rec of _managedIntervals){ if(rec.id===null){ rec.id=setInterval(rec.fn,rec.ms); } }
+}
+document.addEventListener('visibilitychange',()=>{
+  if(document.hidden) _pauseManagedIntervals();
+  else _resumeManagedIntervals();
+});
+_managedInterval(()=>{Object.keys(chars).forEach(k=>{const a=anim[k];if(a.mode==='walk'){a.frame=(a.frame+1)%5;setSprite(k,'walk',a.frame,a.dir)}else if(a.mode==='work'){a.frame=(a.frame+1)%4;setSprite(k,'work',a.frame)} });},286)
+_managedInterval(()=>{Object.keys(chars).forEach(k=>{const a=anim[k];if(a.mode==='sit'){a.frame=(a.frame+1)%2;setSprite(k,'sit',a.frame)} });},700)
 // ── 책상 회피 path planner ──
 // walkPath의 각 leg를 직선이 아닌 *책상을 우회하는* L자 또는 corridor 경로로
 // 펴서 캐릭터가 책상을 가로지르지 않게. 책상이 회전됐을 때를 대비해 padding
@@ -498,7 +521,7 @@ function sendHome(role,mode='sit'){
  if(Math.abs(cx-hx)<1&&Math.abs(cy-hy)<1){setSprite(role,mode);return;}
  walkPath(role,[st.dock,[hx,hy]],()=>setSprite(role,mode));
 }
-setInterval(()=>{
+_managedInterval(()=>{
  if(!['idle','done'].includes(_prevStatus || 'idle')) return;
  const free=Object.keys(chars).filter(k=>anim[k]?.mode==='sit'&&!chars[k].classList.contains('active'));
  if(!free.length)return;
@@ -725,7 +748,7 @@ function _innerThoughtTick(){
  const text = _innerThoughtFor(st.agentKey, anim[role].mode);
  if(text) _bubbleFromLog(role, text);
 }
-setInterval(_innerThoughtTick, 7500);
+_managedInterval(_innerThoughtTick, 7500);

 // ── Webtoon-style 티키타카 banter (refactor: pipeline-aware) ──
 // 각 phase 에 *시퀀스화된 대화 script* 가 있어 phase 진입 시 한 줄씩 시간 차로 emit.
@@ -584,7 +584,35 @@ interface FileScan {
    documentProject: string | undefined;
 }

+/**
+ * mtime-keyed scan cache. The previous implementation re-read (and re-classified)
+ * every brain file from disk on every chat message. We now reuse a parsed
+ * `FileScan` while the file's mtime is unchanged — re-reading only when the file
+ * actually changes. This mirrors the mtime-keyed caching style of
+ * `retrieval/brainIndex.ts` (whose `getBrainTokenIndex` caches tokens the same
+ * way) while keeping the scan output byte-identical, so scoring is unaffected.
+ */
+interface ScanCacheEntry {
+    mtimeMs: number;
+    size: number;
+    scan: FileScan;
+}
+const _scanCache = new Map<string, ScanCacheEntry>();
+
 function scanFile(file: string, brainRoot: string): FileScan {
+    let mtimeMs = 0;
+    let size = 0;
+    try {
+        const stat = fs.statSync(file);
+        mtimeMs = stat.mtimeMs;
+        size = stat.size;
+        const cached = _scanCache.get(file);
+        if (cached && cached.mtimeMs === mtimeMs && cached.size === size) {
+            return cached.scan;
+        }
+    } catch {
+        // stat failed — fall through and attempt a fresh read (which will also fail safely)
+    }
    const relative = path.relative(brainRoot, file);
    const title = path.basename(file, path.extname(file));
    let content = '';
@@ -598,7 +626,11 @@ function scanFile(file: string, brainRoot: string): FileScan {
    const lower = content.toLowerCase();
    const documentProject = inferDocumentProject(relative, lower);
    const titleWithPath = `${relative.replace(/[\\/]/g, ' ')} ${title}`;
-    return { file, relative, title, titleWithPath, content, lower, sourceType, knowledgeRole, documentProject };
+    const scan: FileScan = { file, relative, title, titleWithPath, content, lower, sourceType, knowledgeRole, documentProject };
+    if (mtimeMs > 0) {
+        _scanCache.set(file, { mtimeMs, size, scan });
+    }
+    return scan;
 }

 function scoreScan(scan: FileScan, terms: string[], intent: SecondBrainQueryIntent, targetProject?: string): SecondBrainTraceDocument {
@@ -17,6 +17,14 @@ import { EpisodicEntry, MemoryContextResult } from './types';
 export class EpisodicMemory {
    private episodeDir: string;
    private maxEpisodes: number;
+    /**
+     * mtime-keyed cache of the parsed episode list. The previous implementation
+     * re-read and re-parsed every episode JSON from disk on every message. We now
+     * cache the parsed result and re-read only when the episode directory's mtime
+     * changes (a new/removed/rewritten episode bumps the directory mtime). This
+     * mirrors the mtime-keyed caching style of `retrieval/brainIndex.ts`.
+     */
+    private _episodeCache: { dirMtimeMs: number; episodes: EpisodicEntry[] } | null = null;

    constructor(brainPath: string, maxEpisodes = 50) {
        this.episodeDir = path.join(brainPath, 'memory', 'episodes');
@@ -85,9 +93,19 @@ export class EpisodicMemory {

    /**
     * 저장된 모든 에피소드를 최신순으로 로드합니다.
+     *
+     * Result is cached and re-read only when the episode directory's mtime
+     * changes — creating, deleting, or rewriting an episode file all bump the
+     * directory mtime, so the cache stays correct without per-message disk reads.
     */
    public loadAllEpisodes(): EpisodicEntry[] {
        try {
+            const dirMtimeMs = fs.statSync(this.episodeDir).mtimeMs;
+            const cached = this._episodeCache;
+            if (cached && cached.dirMtimeMs === dirMtimeMs) {
+                return cached.episodes.slice();
+            }
+
            const files = fs.readdirSync(this.episodeDir)
                .filter((f) => f.endsWith('.json'))
                .sort()
@@ -101,7 +119,8 @@ export class EpisodicMemory {
                } catch { /* skip corrupted */ }
            }

-            return episodes;
+            this._episodeCache = { dirMtimeMs, episodes };
+            return episodes.slice();
        } catch {
            return [];
        }
@@ -17,13 +17,16 @@ export class LongTermMemory {
    private store: LongTermStore;
    private filePath: string;
    private dirty = false;
+    /** Hard cap on retained entries — oldest are trimmed when exceeded. Default 100 (matches MemoryConfig.longTermMaxEntries). */
+    private maxEntries: number;

-    constructor(brainPath: string) {
+    constructor(brainPath: string, maxEntries = 100) {
        const memoryDir = path.join(brainPath, 'memory');
        if (!fs.existsSync(memoryDir)) {
            fs.mkdirSync(memoryDir, { recursive: true });
        }
        this.filePath = path.join(memoryDir, 'long_term.json');
+        this.maxEntries = maxEntries > 0 ? maxEntries : 100;
        this.store = this.load();
    }

@@ -62,6 +65,12 @@ export class LongTermMemory {
            referenceCount: 0
        };
        this.store.entries.push(entry);
+        // Enforce the retention cap — drop the oldest entries (by createdAt) once
+        // over the limit. The store array is append-ordered, so the oldest are at
+        // the front; we trim from there.
+        if (this.store.entries.length > this.maxEntries) {
+            this.store.entries.splice(0, this.store.entries.length - this.maxEntries);
+        }
        this.dirty = true;
        this.save();
        return entry;
@@ -54,7 +54,7 @@ export class MemoryManager {
        };

        this.shortTerm = new ShortTermMemory();
-        this.longTerm = new LongTermMemory(brainPath);
+        this.longTerm = new LongTermMemory(brainPath, this.config.longTermMaxEntries);
        this.procedural = new ProceduralMemory(brainPath);
        this.episodic = new EpisodicMemory(brainPath, this.config.episodicMaxEpisodes);
        this.extractor = new MemoryExtractor();
@@ -129,11 +129,24 @@ export function expandQuery(tokens: string[]): string[] {

 /**
 * TF (Term Frequency): 문서 내 용어 빈도
+ *
+ * Takes a precomputed term-count `Map` (built once per document by
+ * `buildTermCounts`) instead of re-scanning the token array per term — the
+ * value is numerically identical to `count / documentTokens.length`.
 */
-function termFrequency(term: string, documentTokens: string[]): number {
-    if (documentTokens.length === 0) return 0;
-    const count = documentTokens.filter((t) => t === term).length;
-    return count / documentTokens.length;
+function termFrequency(term: string, termCounts: Map<string, number>, totalTokens: number): number {
+    if (totalTokens === 0) return 0;
+    const count = termCounts.get(term) || 0;
+    return count / totalTokens;
+}
+
+/** Build a term -> occurrence-count map for one document's token array (computed once, reused per query term). */
+function buildTermCounts(documentTokens: string[]): Map<string, number> {
+    const counts = new Map<string, number>();
+    for (const t of documentTokens) {
+        counts.set(t, (counts.get(t) || 0) + 1);
+    }
+    return counts;
 }

 /**
@@ -231,7 +244,11 @@ export function scoreTfIdfPreTokenized(
    if (documents.length === 0 || queryTokens.length === 0) return [];

    const docTokenArrays = documents.map((doc) => doc.tokens);
-    const docTokenSets = docTokenArrays.map((tokens) => new Set(tokens));
+    // Precompute, once per document: a term -> count map (used for TF) and the
+    // derived token Set (used for IDF). Both were previously recomputed inside
+    // nested loops — building them once and reusing them is numerically identical.
+    const docTermCounts = docTokenArrays.map((tokens) => buildTermCounts(tokens));
+    const docTokenSets = docTermCounts.map((counts) => new Set(counts.keys()));

    // Expand query with synonyms
    const expandedQuery = expandQuery(queryTokens);
@@ -248,6 +265,7 @@ export function scoreTfIdfPreTokenized(

    return documents.map((doc, index) => {
        const docTokens = docTokenArrays[index];
+        const termCounts = docTermCounts[index];
        const titleTokens = new Set(doc.titleTokens);
        let score = 0;
        const matchedTerms: string[] = [];
@@ -262,7 +280,7 @@ export function scoreTfIdfPreTokenized(
        else if (conflictCount >= SCORING_CONFIG.CONFLICT_THRESHOLDS.LOW) conflictSeverity = 'LOW';

        for (const term of expandedQuery) {
-            const tf = termFrequency(term, docTokens);
+            const tf = termFrequency(term, termCounts, docTokens.length);
            const idf = idfCache.get(term) || 1;
            const tfidf = tf * idf;

@@ -22,6 +22,15 @@ function getTrustedRoots(workspaceRoot: string): string[] {
            roots.push(path.normalize(f.uri.fsPath).toLowerCase());
        }
    }
+    // Also trust the immediate parent of each root, so sibling projects under a
+    // shared parent (e.g. E:\Wiki\connectai + E:\Wiki\Datacollect) are reachable
+    // for read/list. Guard: never widen to a drive/filesystem root.
+    for (const r of [...roots]) {
+        const parent = path.normalize(path.dirname(r)).toLowerCase();
+        if (parent && parent !== r && path.dirname(parent) !== parent) {
+            roots.push(parent);
+        }
+    }
    _trustedRoots = [...new Set(roots)];
    return _trustedRoots;
 }
@@ -48,6 +57,59 @@ export function validatePath(workspaceRoot: string, targetPath: string): string
    return absolutePath;
 }

+/**
+ * Splits a command on top-level `&&`, ignoring `&&` that appears inside single-
+ * or double-quoted strings (e.g. a commit message). Returns trimmed, non-empty parts.
+ */
+function splitTopLevelAnd(command: string): string[] {
+    const parts: string[] = [];
+    let buf = '';
+    let quote: string | null = null;
+    for (let i = 0; i < command.length; i++) {
+        const c = command[i];
+        if (quote) {
+            buf += c;
+            if (c === quote) { quote = null; }
+            continue;
+        }
+        if (c === "'" || c === '"') { quote = c; buf += c; continue; }
+        if (c === '&' && command[i + 1] === '&') {
+            parts.push(buf);
+            buf = '';
+            i++; // skip the second '&'
+            continue;
+        }
+        buf += c;
+    }
+    parts.push(buf);
+    return parts.map(p => p.trim()).filter(p => p.length > 0);
+}
+
+/**
+ * Windows PowerShell 5.1 — the default VS Code integrated terminal on Windows —
+ * does not support the `&&` chaining operator (it is a hard parser error, so the
+ * WHOLE command fails to run). Local models emit `&&` constantly because every
+ * git/npm tutorial uses it, and a system-prompt rule alone does not reliably
+ * stop a small model. So rewrite `A && B && C` into a PowerShell-native
+ * conditional chain that preserves short-circuit semantics:
+ *
+ *   A && B && C  ->  A; if ($?) { B; if ($?) { C } }
+ *
+ * `$?` reflects the success of the previous command, so a failed step still
+ * short-circuits the rest — important so e.g. a failed `cd` never lets `git`
+ * run in the wrong directory.
+ */
+function rewriteForPowerShell(command: string): string {
+    if (!command.includes('&&')) { return command; }
+    const parts = splitTopLevelAnd(command);
+    if (parts.length <= 1) { return command; }
+    let chain = parts[parts.length - 1];
+    for (let i = parts.length - 2; i >= 0; i--) {
+        chain = `${parts[i]}; if ($?) { ${chain} }`;
+    }
+    return chain;
+}
+
 /**
 * Sanitizes terminal commands to prevent destructive actions.
 * Uses a combination of blocklist for dangerous patterns and recommendation for allowed tools.
@@ -86,5 +148,7 @@ export function sanitizeCommand(command: string): string {
        console.warn(`[Security] Warning: Running uncommon command '${baseCmd}'. Ensure this is intended.`);
    }

-    return trimmedCmd;
+    // Rewrite `&&` chains for PowerShell (the Windows default terminal) so the
+    // command actually runs instead of failing with a parser error.
+    return rewriteForPowerShell(trimmedCmd);
 }
@@ -25,6 +25,7 @@ import { handleBrainMessage } from './sidebar/brainHandlers';
 import { handleChronicleMessage } from './sidebar/chronicleHandlers';
 import { handleAgentMessage } from './sidebar/agentHandlers';
 import { getOrCreateAgentEntry, resolveScopeForAgent } from './skills/agentKnowledgeMap';
+import { clearBrainTokenIndex } from './retrieval/brainIndex';
 import { estimateModelParamsB } from './lib/contextManager';
 import { loadExternalSkills, formatSkillsAsPromptBlock } from './skills/externalSkillLoader';
 import {
@@ -836,9 +837,15 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
            localResourceRoots: [this._extensionUri]
        };

+        // Webview event listeners must be disposed — otherwise each re-init of the
+        // view leaks a listener (and its captured `this`). We collect every
+        // listener disposable here, dispose them when the view itself is disposed,
+        // and also register them with the extension subscriptions as a backstop.
+        const viewDisposables: vscode.Disposable[] = [];
+
        // [State Persistence Fix] 사이드바가 다시 보여질 때 세팅값 자동 복원
        let _lastVisibilityRefresh = 0;
-        webviewView.onDidChangeVisibility(() => {
+        viewDisposables.push(webviewView.onDidChangeVisibility(() => {
            if (!webviewView.visible) return;
            const now = Date.now();
            // 5초 이내에 이미 갱신했으면 건너뜀
@@ -850,7 +857,7 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
            void this._sendBrainProfiles();
            void this._sendAgentsList();
            void this._sendReadyStatus();
-        });
+        }));

        webviewView.webview.html = this._getHtml(webviewView.webview);
        this._agent.setWebview(webviewView.webview);
@@ -858,7 +865,7 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
        void this._restoreActiveSessionIntoView();
        void this._sendReadyStatus();

-        webviewView.webview.onDidReceiveMessage(async (data) => {
+        viewDisposables.push(webviewView.webview.onDidReceiveMessage(async (data) => {
            // dispatch root 진입 trace — "/benchmark 입력했는데 아무 응답 없음" 같은
            // 보고가 들어왔을 때 webview message가 정말 도착했는지부터 즉시 판별.
            const valuePreview = typeof data?.value === 'string'
@@ -870,7 +877,14 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
            if (await handleChronicleMessage(this, data)) return;
            if (await handleAgentMessage(this, data)) return;
            logInfo(`Unhandled sidebar message: ${data?.type}`);
+        }));
+
+        webviewView.onDidDispose(() => {
+            for (const d of viewDisposables.splice(0)) {
+                try { d.dispose(); } catch { /* already disposed */ }
+            }
        });
+        this._context.subscriptions.push(...viewDisposables);
    }

    _currentSessionId: string | null = null;
@@ -1260,6 +1274,11 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn

        await vscode.workspace.getConfiguration('g1nation').update('activeBrainId', nextProfile.id, vscode.ConfigurationTarget.Global);
        this._currentSessionBrainId = nextProfile.id;
+        // Drop the in-memory brain token index — the active brain (and its path)
+        // may now differ, and the index's `_states` Map is otherwise never cleared.
+        // The persisted on-disk index is left intact and reloads lazily on the
+        // next query for whichever brain is now active.
+        clearBrainTokenIndex();
        await this._sendBrainProfiles();
        await this._sendBrainStatus();

@@ -2,7 +2,8 @@ import * as fs from 'fs';
 import * as path from 'path';
 import { findBrainFiles, summarizeText } from '../utils';
 import { isInside } from '../lib/paths';
-import { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from '../retrieval/scoring';
+import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt } from '../retrieval/scoring';
+import { getBrainTokenIndex } from '../retrieval/brainIndex';
 import { estimateTokens } from '../retrieval/contextBudget';

 /**
@@ -91,33 +92,35 @@ export function retrieveScoped(
    });
    if (candidates.length === 0) return { ...empty, candidateCount: 0 };

-    const documents = candidates.map((file) => {
-        let content = '';
-        let lastModified = 0;
-        try {
-            content = fs.readFileSync(file, 'utf8');
-            lastModified = fs.statSync(file).mtimeMs;
-        } catch { /* skip unreadable file */ }
-        return {
-            title: path.basename(file, '.md'),
-            content,
-            lastModified,
-            filePath: file,
-            relativePath: path.relative(brainRoot, file),
-        };
-    });
+    // Tokenized docs from the persistent mtime-keyed brain index — unchanged files
+    // are not re-read or re-tokenized. The index tokenizes `${basename} ${content}`
+    // (titleTokens = tokenize(basename)), which is exactly what the previous
+    // `scoreTfIdf` call computed here, so scoring stays byte-identical.
+    const indexed = getBrainTokenIndex(brainRoot, candidates);
+    if (indexed.length === 0) return { ...empty, candidateCount: candidates.length };

    const queryTokens = tokenize(query);
    const expanded = expandQuery(queryTokens);
-    const scored = scoreTfIdf(expanded, documents);
+    const scored = scoreTfIdfPreTokenized(
+        expanded,
+        indexed.map((d) => ({
+            tokens: d.tokens,
+            titleTokens: d.titleTokens,
+            lastModified: d.mtimeMs,
+            conflictCount: d.conflictCount,
+        }))
+    );

    const chunks = scored
        .filter((s) => s.score > 0)
        .sort((a, b) => b.score - a.score)
        .slice(0, maxResults)
        .map<ScopedRetrievalChunk>((s) => {
-            const doc = documents[s.index];
-            const excerpt = extractBestExcerpt(doc.content, expanded, excerptLength);
+            const doc = indexed[s.index];
+            // Only the chosen top-`maxResults` files are read off disk (for excerpt extraction).
+            let content = '';
+            try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — empty excerpt */ }
+            const excerpt = extractBestExcerpt(content, expanded, excerptLength);
            const summary = summarizeText(excerpt, excerptLength);
            return {
                relativePath: doc.relativePath,
@@ -150,21 +150,50 @@ export function invalidateBrainFilesCache(dir?: string): void {
 }

 function _walkBrainFiles(dir: string): string[] {
-    let results: string[] = [];
-    if (!fs.existsSync(dir)) return results;
-    const list = fs.readdirSync(dir);
-    list.forEach((file) => {
-        const filePath = path.join(dir, file);
-        const stat = fs.statSync(filePath);
-        if (stat && stat.isDirectory()) {
-            if (!EXCLUDED_DIRS.has(file)) {
-                results = results.concat(_walkBrainFiles(filePath));
+    const results: string[] = [];
+    _walkBrainFilesInto(dir, results);
+    return results;
+}
+
+/**
+ * Recursive walk that pushes `.md` paths into a single shared accumulator.
+ *
+ * Uses `readdirSync(dir, { withFileTypes: true })` so each entry's type comes
+ * from the directory read itself — no extra `fs.statSync` per entry — and pushes
+ * into one array instead of allocating a new array per directory via `.concat`.
+ */
+function _walkBrainFilesInto(dir: string, results: string[]): void {
+    let entries: fs.Dirent[];
+    try {
+        entries = fs.readdirSync(dir, { withFileTypes: true });
+    } catch {
+        // Missing/unreadable directory — matches the previous existsSync guard's behavior.
+        return;
+    }
+    for (const entry of entries) {
+        const name = entry.name;
+        const filePath = path.join(dir, name);
+        let isDir = entry.isDirectory();
+        let isFile = entry.isFile();
+        // Symlinks: Dirent type flags don't follow links, but the previous
+        // statSync-based walk did — resolve them so behavior is unchanged.
+        if (entry.isSymbolicLink()) {
+            try {
+                const stat = fs.statSync(filePath);
+                isDir = stat.isDirectory();
+                isFile = stat.isFile();
+            } catch {
+                continue; // dangling symlink — skip (statSync would have thrown before)
            }
-        } else if (file.endsWith('.md')) {
+        }
+        if (isDir) {
+            if (!EXCLUDED_DIRS.has(name)) {
+                _walkBrainFilesInto(filePath, results);
+            }
+        } else if (isFile && name.endsWith('.md')) {
            results.push(filePath);
        }
-    });
-    return results;
+    }
 }

 const BASE_SYSTEM_PROMPT = `You are Astra, a Jarvis-style local project operating assistant.
@@ -184,31 +213,53 @@ If the provided initial scan preview is not enough, DO NOT complain that you can
 Never say "upload the source code", "provide the files", "파일 내용을 보여주세요", or "먼저 분석할까요?" before attempting access.
 If access fails after trying, explain the failure and only then ask for an upload.

+[EXECUTION RULE]
+When the user asks to run, start, launch, boot, or serve something (실행/구동/시작/켜줘/띄워줘/돌려줘/run/start/launch/serve), ACT — never advise.
+- FORBIDDEN: writing a how-to, a numbered tutorial, "먼저 ~를 확인해야 합니다", "~하시기 바랍니다", or telling the user to run a command themselves. The user asked YOU to run it.
+- NEVER invent a script name, port number, or environment variable. If you have not seen it in a file THIS session, do not state it as fact.
+- If you do not know the exact start command, FIRST read the project's package.json with <read_file>, then emit <run_command> with the real script name.
+- <run_command> runs in a real terminal. If the target folder differs from the workspace, cd into its absolute path first.
+- The terminal is Windows PowerShell. Chain steps with ";" — NEVER "&&" (it is a syntax error in PowerShell 5.1). Example: cd 'C:\proj'; git add .; git commit -m 'msg'; git push
+- After acting, reply with ONE short line: what you started and where. No tutorial, no follow-up checklist.
+
+Worked example — user says: "E:\Wiki\Datacollect 서버 실행해줘"
+Step 1 (only when the start script is unknown):
+<read_file path="E:\Wiki\Datacollect\package.json"/>
+Step 2 (after the real scripts are known — pick the actual one, never a guessed name):
+<run_command>cd 'E:\Wiki\Datacollect'; npm run start-full</run_command>
+Then reply: "Datacollect 서버를 start-full 스크립트로 터미널에서 실행했습니다."
+
 [STRICT GLOBAL RULES]
 1. [NO EMOJIS - ABSOLUTE RULE] NEVER use ANY emojis, emoticons, Unicode pictorial symbols (including but not limited to emoji, kaomoji, Unicode icons), or decorative symbols anywhere in your response. NO EXCEPTIONS. Use plain text dashes (-) or asterisks (*) for bullets. Use plain markdown ## for headers. This rule overrides ALL other formatting instructions.
-2. [UNIQUE HEADINGS] Every markdown heading must be unique and appear exactly once.
+2. [HEADINGS] Every markdown heading must be unique, appear exactly once, and start with exactly one "## " — never "## ##", never "### ###". One space after the hashes.
 3. [NO INTERNAL LOGS] Never output <details>, "2nd Brain Trace", or "Debug JSON" blocks.
 4. [NO SECTION LEAKAGE] Never output sections named "요청 요약", "사용자 의도 추론", "프로젝트 기록 대상 확인", "핵심 확인 질문", or "근거 파일 경로".

 [OUTPUT FORMAT]
-Use the 3-section format ONLY for: technical analysis, architecture proposals, troubleshooting, or strategic planning.
-For conversational replies, quick facts, or simple updates — answer directly without any headers.
+LENGTH decides structure — not topic. Count how long your answer will be:

-  ## 요약
-  Core conclusion in 2-3 sentences.
+- If the answer is longer than ~4 sentences (analysis, advice, planning, troubleshooting, or any multi-part answer), you MUST lead with a summary block, then the detail:
+
+  ## 핵심 요약
+  - 2 to 4 bullet points. Each bullet is one scannable, self-contained takeaway that captures the WHOLE answer — a reader who stops here still gets the gist.
+  - This block is ALWAYS the very first thing in the response. NEVER place a summary at the bottom. NEVER write an intro paragraph before it — the summary block IS the opening.

  ## 상세 설명
-  - Root cause of the problem.
-  - Concrete step-by-step instructions: what to change, which files to edit, which commands to run.
+  Free-form depth. You MAY use your own sub-headers here (e.g. "### 1. ...", "### 2. ..."). This is where the full reasoning and steps go.

  ## 제안  ← Optional. Only include if a meaningfully better alternative exists. Omit otherwise.

+- If the answer is ~4 sentences or fewer (quick fact, simple update, casual or emotional reply) — answer directly, no headers, no summary block.
+
+The summary block is named exactly "## 핵심 요약" and goes at the TOP. A section literally named "요약" placed at the end is a bug — never do that.
+
 [FOLLOW-UP QUESTION RULES]
 A follow-up question is a precision tool, not a ritual.
 Ask ONE focused question at the very end of the response ONLY if:
 - The user's intent is genuinely ambiguous with multiple valid paths, OR
 - A critical missing detail would make the current answer completely wrong.
 If neither condition is met, give a definitive answer and stop.
+When you do ask: it is ONE plain sentence on its own line. NEVER put it under a heading, NEVER label the section ("핵심 확인 질문", "확인 질문" etc.), NEVER attach a "질문 의도" explanation, NEVER ask two or more questions.

 [ENGINEERING STANCE]
 - Be a direct engineering partner. Technical precision over polite filler.