chore: version up to 2.80.34 and package

2026-05-12 22:54:21 +09:00
parent 148bfb070b
commit 065e598cca
26 changed files with 2023 additions and 139 deletions
@@ -1,5 +1,5 @@
 {
  "result": "Final report with inconsistencies. This should be long enough to pass validation.",
-  "createdAt": 1778473456759,
+  "createdAt": 1778593954576,
  "modelVersion": "unknown"
 }
@@ -1,5 +1,5 @@
 {
  "result": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.",
-  "createdAt": 1778473456758,
+  "createdAt": 1778593954567,
  "modelVersion": "unknown"
 }
@@ -1,5 +1,5 @@
 {
  "result": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.",
-  "createdAt": 1778473456756,
+  "createdAt": 1778593954561,
  "modelVersion": "unknown"
 }
@@ -1,5 +1,5 @@
 {
-  "result": "---\nid: stress_conflict_1778473456740\ndate: 2026-05-11T04:24:16.759Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (11ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (5ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (3ms)\n",
+  "result": "---\nid: stress_conflict_1778593954545\ndate: 2026-05-12T13:52:34.580Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (11ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (5ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (10ms)\n",
-  "createdAt": 1778473456760,
+  "createdAt": 1778593954580,
  "modelVersion": "unknown"
 }
@@ -1,8 +1,8 @@
 {
-  "missionId": "stress_conflict_1778473456740",
+  "missionId": "stress_conflict_1778593954545",
  "status": "completed",
-  "startTime": "2026-05-11T04:24:16.740Z",
+  "startTime": "2026-05-12T13:52:34.545Z",
-  "totalElapsedMs": 20,
+  "totalElapsedMs": 35,
  "results": {
    "planner": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.",
    "researcher": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.",
@@ -18,28 +18,28 @@
      "to": "planner",
      "durationMs": 11,
      "message": "전략 수립 중...",
-      "ts": "2026-05-11T04:24:16.751Z"
+      "ts": "2026-05-12T13:52:34.556Z"
    },
    {
      "from": "planner",
      "to": "researcher",
      "durationMs": 5,
      "message": "핵심 정보 수집 및 분석 중...",
-      "ts": "2026-05-11T04:24:16.756Z"
+      "ts": "2026-05-12T13:52:34.561Z"
    },
    {
      "from": "researcher",
      "to": "writer",
-      "durationMs": 3,
+      "durationMs": 10,
      "message": "최종 리포트 작성 및 편집 중...",
-      "ts": "2026-05-11T04:24:16.759Z"
+      "ts": "2026-05-12T13:52:34.571Z"
    },
    {
      "from": "writer",
      "to": "completed",
-      "durationMs": 1,
+      "durationMs": 9,
      "message": "미션 완료",
-      "ts": "2026-05-11T04:24:16.760Z"
+      "ts": "2026-05-12T13:52:34.580Z"
    }
  ],
  "resilienceMetrics": {
@@ -1,5 +1,17 @@
 # Astra Patch Notes
 ## v2.80.34 (2026-05-12)
 ### 🧠 Advanced Context Management & Brain Indexing
 - **신규 컨텍스트 매니저 도입:** `contextManager.ts`를 통해 대규모 파일 및 대화 내역의 우선순위를 지능적으로 관리하고 토큰 예산을 최적화하는 기능을 추가했습니다.
 - **브레인 인덱싱 고도화:** `brainIndex.ts` 및 관련 테스트 코드를 도입하여 지식 베이스 검색 속도와 정확도를 향상시켰습니다.
 - **LM Studio 스트리밍 안정화:** `streamer.ts` 내의 응답 처리 로직을 개선하여 긴 응답 생성 시의 연결 안정성을 확보했습니다.
 - **사이드바 UI/UX 정밀 튜닝:** 사이드바의 인터랙션 로직(HTML/JS/CSS)을 개선하여 사용자 경험을 한층 더 강화했습니다.
 - **텔레그램 원격 실행 설계:** `TELEGRAM_REMOTE_EXECUTION_PLAN.md`를 통해 향후 텔레그램을 통한 원격 에이전트 실행 및 모니터링을 위한 청사진을 수립했습니다.
 - **신규 패키징:** `astra-2.80.34.vsix` 패키지를 생성하여 최신 기능과 안정성 개선 사항을 통합했습니다.
 ---
 ## v2.80.33 (2026-05-11)
 ### 🏛️ Agent Repository Reorganization & Sync
 - **에이전트 저장소 구조 개편 대응:** `Agent` 저장소의 핵심 스킬 및 지식 베이스가 `.agent/`에서 `_agent/` 폴더로 대폭 재배치됨에 따라, 이를 익스텐션의 스킬 로딩 엔진 및 경로 탐색 로직에 동기화했습니다.
@@ -0,0 +1,452 @@
 # Telegram Remote Execution 기획서
 ## 1. 목적
 현재 Astra의 Telegram 연동은 사용자의 메시지를 받아 Second Brain RAG 컨텍스트를 붙이고 AI 답변을 Telegram으로 돌려주는 "원격 질의응답" 기능이다. 사용자가 원하는 다음 단계는 Telegram에서 지시한 업무를 실제 로컬 컴퓨터의 VS Code 워크스페이스에서 진행하게 만드는 것이다.
 이 기능의 목표는 Telegram을 안전한 원격 작업 지시 채널로 확장하는 것이다. 단, 보안상 Telegram 메시지가 곧바로 임의의 파일 변경이나 터미널 명령으로 이어지면 안 된다. 기본 정책은 "읽기 작업은 제한적으로 자동 실행, 쓰기/삭제/명령 실행은 승인 기반"으로 한다.
 ## 2. 현재 구조 요약
 - `src/extension.ts`
  - Telegram bot을 생성하고 `handle(text, chatId)`에서 메시지를 처리한다.
  - 현재는 `AIService.chat()`만 호출한다.
  - `AgentExecutor.handlePrompt()`나 `executeActions()` 경로를 사용하지 않는다.
 - `src/integrations/telegram/telegramBot.ts`
  - Telegram long polling, allowlist, enrollment, send retry를 담당한다.
  - handler가 반환한 문자열을 Telegram으로 보낸다.
 - `src/agent.ts`
  - 사이드바 채팅 요청을 처리한다.
  - AI 응답 안의 action tag를 파싱해 파일 생성/수정/삭제, 파일 읽기, 명령 실행, URL 읽기 등을 수행한다.
  - `dryRun`이 켜진 경우 `TransactionManager`와 `ApprovalQueue`를 통해 승인/롤백 흐름을 제공한다.
 - `src/features/approval/approvalQueue.ts`
  - 현재 승인 대기 작업을 0..1개 관리한다.
  - approve/reject callback을 실행한다.
 ## 3. 핵심 제품 방향
 Telegram 원격 실행은 기존 사이드바 Agent 실행 기능을 그대로 노출하는 것이 아니라 별도 게이트웨이를 둔다.
 추천 명칭:
 - `TelegramTaskGateway`
 - 위치: `src/integrations/telegram/telegramTaskGateway.ts`
 역할:
 - Telegram 메시지를 "단순 답변"과 "로컬 작업 지시"로 분류한다.
 - 로컬 작업 지시일 경우 작업 계획을 생성한다.
 - 위험도를 평가한다.
 - 읽기 전용 작업은 바로 실행할 수 있다.
 - 파일 변경, 삭제, 터미널 명령은 승인 토큰을 발급하고 대기시킨다.
 - 승인 후에만 실제 로컬 작업을 실행한다.
 ## 4. 사용자 경험
 ### 4.1 기본 명령
 Telegram에서 다음 명령을 지원한다.
 - `/ask 질문`
  - 현재처럼 Second Brain 기반 답변만 생성한다.
 - `/task 업무지시`
  - 로컬 워크스페이스에서 수행할 작업으로 해석한다.
  - 예: `/task README를 읽고 설치 가이드가 부족한 부분을 보완해줘`
 - `/status`
  - 현재 실행 중이거나 승인 대기 중인 작업 상태를 보여준다.
 - `/approve 작업ID`
  - 승인 대기 중인 작업을 실행한다.
 - `/reject 작업ID`
  - 승인 대기 중인 작업을 폐기한다.
 - `/cancel`
  - 현재 실행 중인 Telegram-origin 작업을 중단한다.
 ### 4.2 예시 흐름
 사용자:
 ```text
 /task package.json이랑 README 보고 설치 방법 문서 보완해줘
 ```
 Astra:
 ```text
 작업 계획을 만들었습니다.
 작업 ID: tg-20260512-184233
 위험도: 파일 수정 필요
 예상 작업:
 1. package.json 읽기
 2. README.md 읽기
 3. README.md의 설치 섹션 수정
 변경 대상:
 - README.md
 실행하려면:
 /approve tg-20260512-184233
 취소하려면:
 /reject tg-20260512-184233
 ```
 사용자:
 ```text
 /approve tg-20260512-184233
 ```
 Astra:
 ```text
 승인되었습니다. 작업을 실행합니다.
 ```
 완료 후:
 ```text
 작업 완료.
 변경 파일:
 - README.md
 요약:
 - npm install 단계 추가
 - VSIX 설치 방법 보완
 - LM Studio/Ollama 설정 안내 추가
 ```
 ## 5. 보안 정책
 ### 5.1 Telegram allowlist 필수
 원격 실행 기능은 `g1nation.telegram.allowedChatIds`가 비어 있으면 비활성화한다.
 이유:
 - 현재 설정은 allowedChatIds가 비어 있으면 모든 chat을 허용한다.
 - 질의응답에는 괜찮을 수 있지만 원격 실행에는 위험하다.
 정책:
 - `/task`, `/approve`, `/reject`, `/cancel`, `/status`는 allowlist 등록 chat에서만 동작한다.
 - allowlist가 비어 있으면 다음 메시지를 반환한다.
 ```text
 원격 작업 기능은 허용된 Telegram chat에서만 사용할 수 있습니다.
 Settings에서 내 채널 자동 등록을 먼저 완료해주세요.
 ```
 ### 5.2 작업 위험도
 작업을 4단계로 분류한다.
 - `read_only`
  - 파일 목록 조회, 파일 읽기, Second Brain 검색, 코드 분석.
  - 자동 실행 가능.
 - `file_write`
  - 파일 생성/수정.
  - 승인 필요.
 - `destructive`
  - 파일 삭제, 대량 변경, git clean/reset, dependency reinstall.
  - 명시적 승인 필요. 기본적으로 2단계 확인을 권장한다.
 - `command`
  - 터미널 명령 실행.
  - 승인 필요.
  - `sanitizeCommand()` 통과 필요.
  - 장기적으로 allowlist 기반 command policy가 필요하다.
 ### 5.3 경로 제한
 - 모든 파일 작업은 기존 `validatePath(workspaceRoot, targetPath)`를 통과해야 한다.
 - 워크스페이스 밖 파일은 기본 차단한다.
 - Second Brain 경로는 읽기/쓰기 정책을 별도로 둔다.
 ### 5.4 기본값
 새 설정을 추가한다.
 ```json
 {
  "g1nation.telegram.remoteExecutionEnabled": false,
  "g1nation.telegram.remoteReadOnlyAutoRun": true,
  "g1nation.telegram.remoteRequireApproval": true,
  "g1nation.telegram.remoteAllowCommands": false,
  "g1nation.telegram.remoteMaxTaskSteps": 8
 }
 ```
 권장 기본값:
 - remoteExecutionEnabled: false
 - remoteReadOnlyAutoRun: true
 - remoteRequireApproval: true
 - remoteAllowCommands: false
 - remoteMaxTaskSteps: 8
 ## 6. 목표 아키텍처
 ```text
 TelegramBot
  -> TelegramTaskGateway.handle(text, chatId)
      -> command parser
      -> permission check
      -> task planner
      -> risk classifier
      -> approval store
      -> TelegramTaskExecutor
          -> AgentExecutor or shared ActionExecutor
          -> TransactionManager
          -> ApprovalQueue
      -> Telegram reply
 ```
 ## 7. 구현 전략
 ### Phase 1: 명령 라우팅과 승인 대기
 목표:
 - `/ask`, `/task`, `/status`, `/approve`, `/reject` 명령을 Telegram에서 인식한다.
 - 실제 파일 변경은 아직 하지 않아도 된다.
 - `/task`는 작업 계획과 승인 요청 메시지를 반환한다.
 작업:
 - `telegramTaskGateway.ts` 추가
 - `TelegramCommand` 타입 정의
 - `PendingTelegramTask` 타입 정의
 - in-memory pending task store 추가
 - extension.ts의 Telegram handler에서 gateway 호출
 성공 기준:
 - `/ask`는 기존 답변 경로 유지
 - `/task`는 작업 ID와 계획을 반환
 - `/status`는 pending task를 보여줌
 - `/approve`와 `/reject`는 pending task 상태를 변경
 ### Phase 2: 읽기 전용 작업 실행
 목표:
 - Telegram에서 요청한 코드 분석, 파일 읽기, 파일 목록 조회를 자동으로 수행한다.
 작업:
 - `TelegramReadOnlyTool` 추가
 - 허용 action:
  - list files
  - read file
  - search text
  - Second Brain retrieval
 - 결과를 요약해 Telegram으로 반환
 성공 기준:
 - `/task src 구조 분석해줘`가 실제 파일 목록과 주요 파일 내용을 읽고 답변한다.
 - 워크스페이스 밖 경로 요청은 차단된다.
 ### Phase 3: 파일 변경 작업 승인 실행
 목표:
 - 파일 생성/수정 요청을 Telegram에서 승인 후 실행한다.
 권장 구현:
 - `AgentExecutor.executeActions()`는 현재 private이므로 바로 재사용하기 어렵다.
 - 장기적으로는 action 실행부를 `src/core/actionExecutor.ts`로 추출한다.
 - 단기 MVP는 `AgentExecutor.handlePrompt()`를 Telegram origin으로 호출하는 방식도 가능하다.
 추천 구조:
 ```text
 ActionExecutor
  - parseActionTags(aiMessage)
  - previewActions(actions)
  - execute(actions, options)
  - rollback(transactionId)
 ```
 성공 기준:
 - `/task docs에 사용법 문서 만들어줘`
 - Astra가 계획과 변경 예정 파일을 보여줌
 - `/approve id` 후 파일 생성
 - `/reject id` 후 아무 변경 없음
 ### Phase 4: 명령 실행 지원
 목표:
 - 제한된 터미널 명령을 승인 후 실행한다.
 정책:
 - 기본 비활성화.
 - `remoteAllowCommands`가 true일 때만 동작.
 - `sanitizeCommand()` 통과 필요.
 - command allowlist를 설정으로 관리하는 것을 권장한다.
 추가 설정:
 ```json
 {
  "g1nation.telegram.remoteCommandAllowlist": [
    "npm test",
    "npm run test",
    "npm run compile",
    "git status",
    "git diff"
  ]
 }
 ```
 성공 기준:
 - `/task 테스트 실행해줘`는 승인 요청을 만든다.
 - `/approve id` 후 허용된 명령만 실행한다.
 - 차단된 명령은 실행하지 않고 이유를 반환한다.
 ## 8. AI 프롬프트 정책
 Telegram task planner용 system prompt를 별도로 둔다.
 ```text
 You are Astra Telegram Task Planner.
 Your job is to convert a Telegram message into a safe local workspace task plan.
 Rules:
 - Never execute actions directly.
 - Produce a concise task plan.
 - Classify risk as read_only, file_write, destructive, or command.
 - List expected files or directories when possible.
 - If the request is ambiguous, ask one short clarifying question.
 - If the user requests computer-wide access outside the workspace, refuse and explain the workspace boundary.
 - Prefer read-only inspection before proposing writes.
 Output JSON only:
 {
  "intent": "ask" | "task" | "approve" | "reject" | "status" | "cancel",
  "risk": "read_only" | "file_write" | "destructive" | "command",
  "summary": "...",
  "steps": ["..."],
  "targets": ["..."],
  "requiresApproval": true,
  "clarifyingQuestion": ""
 }
 ```
 ## 9. 데이터 모델
 ```ts
 export type TelegramTaskRisk = 'read_only' | 'file_write' | 'destructive' | 'command';
 export type TelegramTaskStatus = 'planned' | 'pending_approval' | 'running' | 'completed' | 'rejected' | 'failed' | 'cancelled';
 export interface PendingTelegramTask {
  id: string;
  chatId: number;
  createdAt: number;
  status: TelegramTaskStatus;
  risk: TelegramTaskRisk;
  originalText: string;
  summary: string;
  steps: string[];
  targets: string[];
  requiresApproval: boolean;
  approvalToken?: string;
 }
 ```
 ## 10. Telegram 응답 포맷
 Telegram은 길이 제한이 있으므로 짧고 명령 중심으로 응답한다.
 승인 대기:
 ```text
 작업 대기 중
 ID: tg-xxxx
 위험도: file_write
 대상: README.md
 실행: /approve tg-xxxx
 취소: /reject tg-xxxx
 ```
 완료:
 ```text
 작업 완료
 변경:
 - README.md
 요약:
 - 설치 섹션 보완
 - 로컬 모델 설정 설명 추가
 ```
 차단:
 ```text
 차단됨
 이 작업은 워크스페이스 밖 파일에 접근하려고 합니다.
 Telegram 원격 실행은 현재 열린 VS Code 워크스페이스 내부에서만 허용됩니다.
 ```
 ## 11. 테스트 계획
 단위 테스트:
 - command parser
 - allowlist 필수 정책
 - risk classifier
 - pending task store
 - approve/reject 상태 전환
 - workspace 밖 path 차단
 - command allowlist 차단
 통합 테스트:
 - `/ask` 기존 응답 유지
 - `/task` 계획 생성
 - `/task` read_only 자동 실행
 - `/task` file_write 승인 대기
 - `/approve` 후 실행
 - `/reject` 후 실행 안 됨
 - allowedChatIds 비어 있을 때 원격 실행 차단
 회귀 테스트:
 - 기존 Telegram RAG 답변이 깨지지 않아야 한다.
 - Telegram token 저장/삭제/연결 테스트가 기존처럼 동작해야 한다.
 - 사이드바 Approval Panel과 충돌하지 않아야 한다.
 ## 12. 최종 권장 MVP
 가장 안전하고 빠른 MVP 범위:
 1. `/ask`, `/task`, `/status`, `/approve`, `/reject` 명령 추가
 2. allowlist가 비어 있으면 `/task` 차단
 3. read-only 작업만 자동 실행
 4. file write와 command는 승인 대기까지만 구현
 5. action 실행부는 다음 단계에서 `ActionExecutor`로 추출
 이 MVP만으로도 사용자는 Telegram에서 "프로젝트 분석해줘", "이 파일 읽고 요약해줘", "어떤 파일을 고쳐야 하는지 계획 세워줘"를 원격으로 시킬 수 있다. 이후 승인 실행까지 붙이면 Telegram이 완전한 원격 작업 지시 인터페이스가 된다.
@@ -726,3 +726,69 @@
            from { opacity: 0; transform: translateX(-10px); }
            to { opacity: 1; transform: translateX(0); }
        }
        /* ── Ready-status bar ───────────────────────────────────────────── */
        .ready-bar {
            display: flex;
            align-items: center;
            gap: 6px;
            padding: 4px 12px;
            font-size: 10.5px;
            line-height: 1.4;
            color: var(--text-dim);
            background: var(--bg-secondary);
            border-bottom: 1px solid var(--border);
            white-space: nowrap;
            overflow-x: auto;
            scrollbar-width: none;
        }
        .ready-bar::-webkit-scrollbar { display: none; }
        .ready-bar .rb-dot {
            width: 6px; height: 6px; border-radius: 50%;
            background: var(--text-dim); flex-shrink: 0;
        }
        .ready-bar .rb-dot.ok { background: var(--success); }
        .ready-bar .rb-dot.bad { background: var(--error); }
        .ready-bar .rb-dot.warn { background: var(--warning); }
        .ready-bar .rb-content { display: flex; align-items: center; gap: 6px; flex-wrap: nowrap; }
        .ready-bar .rb-seg { color: var(--text-dim); }
        .ready-bar .rb-seg.ok { color: var(--success); }
        .ready-bar .rb-seg.bad { color: var(--error); }
        .ready-bar .rb-seg.rb-dim, .ready-bar .rb-dim { color: var(--border-bright); }
        .ready-bar .rb-sep { color: var(--border); margin: 0 1px; }
        .ready-bar .rb-link { color: var(--accent); cursor: pointer; }
        .ready-bar .rb-link:hover { text-decoration: underline; }
        /* ── Context-budget badge (input footer) ────────────────────────── */
        .ctx-badge {
            font-size: 10px;
            color: var(--text-dim);
            padding: 1px 6px;
            border-radius: 9px;
            border: 1px solid var(--border);
            background: transparent;
            white-space: nowrap;
            max-width: 60vw;
            overflow: hidden;
            text-overflow: ellipsis;
        }
        .ctx-badge:empty { display: none; }
        .ctx-badge.ok { border-color: var(--border); }
        .ctx-badge.warn { color: var(--warning); border-color: var(--warning); }
        /* ── Per-answer "scope used" footer ─────────────────────────────── */
        .msg-scope-footer {
            margin-top: 6px;
            padding-top: 6px;
            border-top: 1px dashed var(--border);
            font-size: 10px;
            color: var(--text-dim);
            line-height: 1.5;
            word-break: break-word;
        }
        .msg-scope-footer .scope-link {
            color: var(--accent);
            cursor: pointer;
        }
        .msg-scope-footer .scope-link:hover { text-decoration: underline; }
        .msg-scope-footer .scope-dim { color: var(--border-bright); }
@@ -74,6 +74,11 @@
        </div>
    </div>
    <div id="readyBar" class="ready-bar" title="현재 준비 상태 — 엔진 / 모델 / Brain / Agent 범위 / 메모리 / 컨텍스트 창">
        <span class="rb-dot" id="rbDot"></span>
        <span class="rb-content" id="rbContent">준비 상태 확인 중…</span>
    </div>
    <div id="historyOverlay" class="history-overlay">
        <div style="display:flex; justify-content:space-between; align-items:center; margin-bottom:20px;">
            <h2 style="color:var(--text-bright);">Chat History</h2>
@@ -169,6 +174,7 @@
                <div class="footer-left">
                    <button class="icon-btn" id="attachBtn" title="Attach Files">📎</button>
                    <span id="statusLabel" style="font-size:10px; color:var(--text-dim);">Ready</span>
                    <span id="ctxBadge" class="ctx-badge" title="직전 요청에 실제로 들어간 컨텍스트 추정치"></span>
                </div>
                <div class="footer-right">
                    <button id="cancelBtn" class="cancel-btn" title="Clear draft" style="display:none;">✕ Clear</button>
@@ -169,6 +169,140 @@
        const skillFolderList = document.getElementById('skillFolderList');
        const agentMapAgentName = document.getElementById('agentMapAgentName');
        const agentMapStatus = document.getElementById('agentMapStatus');
        const readyBar = document.getElementById('readyBar');
        const rbDot = document.getElementById('rbDot');
        const rbContent = document.getElementById('rbContent');
        const ctxBadge = document.getElementById('ctxBadge');
        // ── Ready-status bar ─────────────────────────────────────────────────
        let readyState = {};
        function fmtK(n) {
            if (typeof n !== 'number' || !isFinite(n)) return '?';
            if (n >= 1000) return (n / 1000).toFixed(n >= 10000 ? 0 : 1).replace(/\.0$/, '') + 'k';
            return String(n);
        }
        function renderReadyBar() {
            if (!readyBar || !rbContent) return;
            const s = readyState;
            const segs = [];
            // Engine
            if (s.engine) {
                const on = s.engine.online;
                const tag = on === true ? '온라인' : on === false ? '오프라인' : '확인 중';
                segs.push(`<span class="rb-seg ${on === false ? 'bad' : on === true ? 'ok' : ''}">${s.engine.label || 'Engine'}: ${tag}</span>`);
            }
            // Model
            if (s.model && s.model.name) {
                const loaded = s.model.loaded;
                const dot = loaded === true ? '● ' : loaded === false ? '○ ' : '';
                segs.push(`<span class="rb-seg" title="${loaded === true ? '메모리에 로드됨' : loaded === false ? '아직 로드되지 않음' : ''}">${dot}${escAttr(s.model.name)}</span>`);
            }
            // Brain
            if (s.brain) {
                segs.push(`<span class="rb-seg">Brain ${typeof s.brain.files === 'number' ? s.brain.files : '?'}<span class="rb-dim"> ${escAttr(s.brain.name || '')}</span></span>`);
            }
            // Agent + scope
            if (s.agent && s.agent.name) {
                const scope = s.agent.scopeFolders > 0
                    ? ` <span class="rb-link" data-act="map">(범위 ${s.agent.scopeFolders})</span>`
                    : ` <span class="rb-dim">(범위 미설정)</span>`;
                segs.push(`<span class="rb-seg">Agent: ${escAttr(s.agent.name)}${scope}</span>`);
            } else {
                segs.push(`<span class="rb-seg rb-dim">Agent 없음</span>`);
            }
            // Memory
            segs.push(`<span class="rb-seg ${s.memory ? '' : 'rb-dim'}">메모리 ${s.memory ? '켜짐' : '꺼짐'}</span>`);
            // Multi-agent (only when on)
            if (s.multiAgent) segs.push(`<span class="rb-seg">멀티에이전트</span>`);
            // Context window (capped for small models gets a ↓ marker)
            if (typeof s.contextLength === 'number') {
                if (s.cappedForSmallModel) {
                    segs.push(`<span class="rb-seg" title="작은 모델(≤4B) 감지 — 예산을 ${fmtK(s.contextLength)} tokens 로 축소 (설정 g1nation.contextLength = ${fmtK(s.nominalContextLength)}). g1nation.smallModelContextCap 로 조절.">ctx ${fmtK(s.contextLength)}<span class="rb-dim"> ↓작은모델</span></span>`);
                } else {
                    segs.push(`<span class="rb-seg" title="모델 context window (g1nation.contextLength). 실제 로드된 값과 맞춰주세요.">ctx ${fmtK(s.contextLength)}</span>`);
                }
            }
            rbContent.innerHTML = segs.join('<span class="rb-sep">·</span>');
            if (rbDot) {
                const on = s.engine && s.engine.online;
                rbDot.className = 'rb-dot ' + (on === true ? 'ok' : on === false ? 'bad' : 'warn');
            }
        }
        function escAttr(t) { return String(t == null ? '' : t).replace(/[&<>"]/g, c => ({ '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;' }[c])); }
        // ── Context-budget badge (직전 요청 기준) ────────────────────────────
        function renderCtxBadge(b) {
            if (!ctxBadge) return;
            if (!b || typeof b.inputTokens !== 'number') { ctxBadge.textContent = ''; ctxBadge.className = 'ctx-badge'; ctxBadge.title = ''; return; }
            const parts = [`≈${fmtK(b.inputTokens)} in / ${fmtK(b.maxOutputTokens)} out`];
            if (typeof b.contextLength === 'number') {
                parts.push(b.cappedForSmallModel ? `ctx ${fmtK(b.contextLength)}↓` : `ctx ${fmtK(b.contextLength)}`);
            }
            if (typeof b.brainFiles === 'number' && b.brainFiles > 0) parts.push(`Brain ${b.brainFiles}`);
            if (b.includesOpenFile) parts.push('📄 열린 파일');
            if (b.imageCount > 0) parts.push(`🖼 ${b.imageCount}`);
            if (b.droppedHistory > 0) parts.push(`기록 −${b.droppedHistory}`);
            if (b.systemTruncated) parts.push('컨텍스트 일부 생략');
            if (b.cappedForSmallModel) parts.push('🔻 작은 모델 모드');
            if (b.tight) parts.push('⚠ 컨텍스트 거의 가득');
            const warn = b.tight || b.systemTruncated;
            ctxBadge.textContent = parts.join(' · ');
            ctxBadge.className = 'ctx-badge' + (warn ? ' warn' : ' ok');
            ctxBadge.title = `model: ${b.model || ''}${b.paramB != null ? ' (~' + b.paramB + 'B)' : ''}\n입력 ≈ ${b.inputTokens} tokens (시스템 ${b.systemTokens}, 기록 ${b.historyKept}개)\n출력 상한 ${b.maxOutputTokens} tokens / 유효 context window ${b.contextLength} tokens${b.cappedForSmallModel ? ' (작은 모델용 축소; 설정값 ' + b.nominalContextLength + ')' : ''}`;
        }
        if (readyBar) {
            readyBar.addEventListener('click', e => {
                const t = e.target;
                if (t && t.dataset && t.dataset.act === 'map') vscode.postMessage({ type: 'editKnowledgeMap' });
            });
        }
        // ── Per-answer "scope used" footer ──────────────────────────────────
        const MEMORY_LAYER_LABELS = {
            'long-term-memory': '장기기억',
            'project-memory': '프로젝트기억',
            'procedural-memory': '절차기억',
            'episodic-memory': '에피소드기억',
            'project-scan': '프로젝트스캔',
            'recent-knowledge': '최근지식',
        };
        function dirOf(rel) {
            const i = Math.max(rel.lastIndexOf('/'), rel.lastIndexOf('\\'));
            return i > 0 ? rel.slice(0, i) : '(루트)';
        }
        function renderScopeFooter(target, v) {
            if (!target) return;
            const old = target.querySelector('.msg-scope-footer');
            if (old) old.remove();
            const footer = document.createElement('div');
            footer.className = 'msg-scope-footer';
            const files = Array.isArray(v.usedBrainFiles) ? v.usedBrainFiles : [];
            const layers = (Array.isArray(v.usedMemoryLayers) ? v.usedMemoryLayers : []).map(s => MEMORY_LAYER_LABELS[s] || s);
            if (files.length === 0 && layers.length === 0) {
                footer.innerHTML = `<span class="scope-link" data-act="map" title="에이전트↔지식 매핑 편집">🔎 참조 지식 없음</span> <span class="scope-dim">— 모델 자체 지식으로 답변</span>`;
            } else {
                const dirs = Array.from(new Set(files.map(dirOf)));
                let scopeLabel;
                if (v.scoped && Array.isArray(v.configuredFolders) && v.configuredFolders.length) {
                    scopeLabel = v.configuredFolders.join(', ');
                } else if (dirs.length) {
                    scopeLabel = dirs.slice(0, 4).join(', ') + (dirs.length > 4 ? ` 외 ${dirs.length - 4}` : '');
                } else {
                    scopeLabel = '전체 브레인';
                }
                const agentTag = v.agentName ? `[${escAttr(v.agentName)}] ` : '';
                const fileTag = files.length ? ` <span class="scope-dim">· 파일 ${files.length}</span>` : '';
                const layerTag = layers.length ? ` <span class="scope-dim">· 메모리 ${escAttr(layers.join('·'))}</span>` : '';
                const titleAttr = files.length ? `사용된 브레인 파일:\n${files.join('\n')}` : '에이전트↔지식 매핑 편집';
                footer.innerHTML = `<span class="scope-link" data-act="map" title="${escAttr(titleAttr)}">🔎 참조: ${agentTag}${escAttr(scopeLabel)}</span>${fileTag}${layerTag}`;
            }
            footer.addEventListener('click', e => {
                const t = e.target;
                if (t && t.dataset && t.dataset.act === 'map') vscode.postMessage({ type: 'editKnowledgeMap' });
            });
            const actions = target.querySelector('.msg-actions');
            if (actions) target.insertBefore(footer, actions); else target.appendChild(footer);
        }
        let agentMapDraft = { agentPath: '', name: '', knowledgeFolders: [], skillFolders: [] };
@@ -348,6 +482,7 @@
                    setGenerating(false);
                    resetStepper();
                    Sound.success();
                    vscode.postMessage({ type: 'getReadyStatus' });
                    break;
                case 'restoreHistory':
                case 'sessionLoaded':
@@ -433,7 +568,25 @@
                case 'engineStatus':
                    statusDot.style.background = msg.value.online ? 'var(--success)' : 'var(--error)';
                    engineStatusText.innerText = msg.value.online ? 'Online' : 'Offline';
                    readyState.engine = Object.assign({}, readyState.engine, { online: !!msg.value.online });
                    renderReadyBar();
                    break;
                case 'readyStatus':
                    readyState = Object.assign({}, readyState, msg.value || {});
                    renderReadyBar();
                    break;
                case 'contextBudget':
                    renderCtxBadge(msg.value);
                    break;
                case 'usedScope': {
                    let target = streamBody && streamBody._parent;
                    if (!target) {
                        const all = chat.querySelectorAll('.msg.msg-ai');
                        target = all[all.length - 1];
                    }
                    renderScopeFooter(target, msg.value || {});
                    break;
                }
                case 'autoContinue':
                    statusLabel.innerText = msg.value; thinkingBar.classList.add('active');
                    if (msg.value.includes('Analyzing')) setStep('analyze');
@@ -2,7 +2,7 @@
  "name": "astra",
  "displayName": "Astra",
  "description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.",
-  "version": "2.80.33",
+  "version": "2.80.34",
  "publisher": "g1nation",
  "license": "MIT",
  "icon": "assets/icon.png",
@@ -178,6 +178,45 @@
          "default": 300,
          "description": "Request timeout in seconds. Default: 300"
        },
        "g1nation.contextLength": {
          "type": "number",
          "default": 32768,
          "minimum": 2048,
          "description": "Model context window in tokens (prompt + generation combined). Set this to the value your loaded model is actually running with in LM Studio / Ollama. Astra budgets prompt and output against this so it never overflows. Default: 32768"
        },
        "g1nation.maxOutputTokens": {
          "type": "number",
          "default": 4096,
          "minimum": 256,
          "description": "Upper bound on tokens generated per response. The effective limit is reduced automatically when the prompt is large so input + output stays within g1nation.contextLength. Default: 4096"
        },
        "g1nation.contextSafetyMargin": {
          "type": "number",
          "default": 2048,
          "minimum": 0,
          "description": "Tokens kept free as a safety buffer for token-count estimation error. Default: 2048"
        },
        "g1nation.contextOverflowPolicy": {
          "type": "string",
          "enum": [
            "stopAtLimit",
            "truncateMiddle",
            "rollingWindow"
          ],
          "default": "stopAtLimit",
          "description": "Fallback behavior (LM Studio) if the prompt still exceeds the context window after Astra's own budgeting. 'stopAtLimit' fails clearly so you notice; 'truncateMiddle'/'rollingWindow' drop content silently. Default: stopAtLimit"
        },
        "g1nation.autoCompactHistory": {
          "type": "boolean",
          "default": true,
          "description": "Automatically drop the oldest conversation messages from the request when the prompt would exceed the context budget (the on-screen chat history is unaffected). Default: true"
        },
        "g1nation.smallModelContextCap": {
          "type": "number",
          "default": 8192,
          "minimum": 0,
          "description": "When a small model (≤4B parameters, detected from the model name) is selected, budget the prompt against this smaller effective context window instead of g1nation.contextLength — small models often emit an empty/EOS response on prompts that nominally fit but exceed their real capability. Set 0 to disable. Default: 8192"
        },
        "g1nation.lmStudio.idleTimeoutMs": {
          "type": "number",
          "default": 300000,
@@ -292,6 +331,11 @@
          "type": "object",
          "default": {},
          "description": "Inline fallback for the agent ↔ knowledge mapping. Used only when the JSON file is missing. Shape: { defaultAgent?, agents: [{ name, knowledgeFolders, model?, description? }] }. Folder paths can be absolute, ~-prefixed, or relative to the active brain root."
        },
        "g1nation.agentSkillsPath": {
          "type": "string",
          "default": "",
          "description": "Absolute path to the agent skills folder (`.agent/skills/*.md`). When empty, defaults to '<workspace>/.agent/skills'. Use this on Windows or when your skills live outside the workspace."
        }
      }
    }
@@ -40,6 +40,17 @@ import {
 import { MemoryManager } from './memory';
 import { RetrievalOrchestrator } from './retrieval';
 import { resolveScopeForAgent } from './skills/agentKnowledgeMap';
 import {
    estimateTokens,
    estimateMessagesTokens,
    computeOutputBudget,
    trimHistoryToBudget,
    truncateSystemPromptContext,
    classifyStopReason,
    truncationNotice,
    estimateModelParamsB,
    type ContextLimits,
 } from './lib/contextManager';
 export interface ChatMessage {
    role: 'user' | 'assistant' | 'system';
@@ -94,7 +105,17 @@ const AGENT_PROMPTS: Record<AgentRole, string> = {
 3. Deliver a logical, consistent, and polished response.`
 };
 // Local-path detectors used to decide whether a user prompt refers to a file/dir on disk.
 // POSIX: /Volumes/, /Users/, /home/, /opt/, ... or ~/  — backtick excluded (markdown code spans).
 const POSIX_ABS_PATH_SRC = "(?:\\/(?:Volumes|Users|home|opt|srv|mnt|data|workspace)\\/|~\\/)[^\\s`\"'<>|*?]+";
 // Windows: drive letter (C:\ or C:/) or UNC (\\server\share). Backslash IS allowed as a separator here.
 const WIN_ABS_PATH_SRC = "(?:[A-Za-z]:[\\\\/]|\\\\\\\\[^\\s\\\\/]+\\\\[^\\s\\\\/]+)[^\\s`\"'<>|*?]*";
 export class AgentExecutor {
    /** Non-global instances — safe for repeated `.test()` (a shared /g/ regex's lastIndex would corrupt results). */
    static readonly ABS_PATH_RE = new RegExp(POSIX_ABS_PATH_SRC, 'i');
    static readonly WIN_ABS_PATH_RE = new RegExp(WIN_ABS_PATH_SRC, 'i');
    private chatHistory: ChatMessage[] = [];
    private abortController: AbortController | null = null;
    private webview: vscode.Webview | undefined;
@@ -107,6 +128,17 @@ export class AgentExecutor {
    private memoryManager: MemoryManager;
    private retrievalOrchestrator: RetrievalOrchestrator;
    private currentTaskId: string = 'default_session';
    /** Set by buildMemoryContext after each retrieval — fed to the webview's per-answer "scope used" footer. */
    private _lastRetrievalInfo: {
        agentName: string | null;
        scoped: boolean;
        source: string;
        configuredFolders: string[];   // relative to brain root
        usedBrainFiles: string[];      // relative to brain root
        usedMemoryLayers: string[];    // raw RetrievalSource ids
        totalChunks: number;
        selectedChunks: number;
    } | null = null;
    private readonly options: AgentExecutorOptions;
@@ -445,7 +477,8 @@ export class AgentExecutor {
                ].join('\n');
                // 3. 조립: 기본(축소) → 유틸리티 컨텍스트 → 에이전트 프롬프트(최후단)
-                fullSystemPrompt = `${strippedSystemPrompt}${internetCtx}${memoryCtx}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${brainContext}${brainInventoryCtx}\n${contextBlock}${negativeCtx}${agentDirective}`;
+                //    [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — agentDirective/negative 는 보호.
                fullSystemPrompt = `${strippedSystemPrompt}${internetCtx}${memoryCtx}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${brainContext}${brainInventoryCtx}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}${agentDirective}`;
            } else {
                // 기존 Astra 모드 (에이전트 미선택)
                const localProjectKnowledgeCtx = prompt && localPathContext && this.isProjectKnowledgeCreationRequest(prompt)
@@ -464,13 +497,95 @@ export class AgentExecutor {
                    "- [충돌] 지식 간 충돌 발생 시 시스템이 독단적으로 판단하지 말고, 반드시 [CONFLICT WARNING] 플래그와 함께 상충되는 두 관점을 모두 명시하여 사용자에게 판단을 위임할 것."
                ].join('\n');
-                fullSystemPrompt = `${systemPrompt}${internetCtx}${memoryCtx}${designerCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}\n\n[CONTEXT]\n${brainContext}${brainInventoryCtx}\n${contextBlock}${negativeCtx}`;
+                // [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — negative constraints 는 보호.
                fullSystemPrompt = `${systemPrompt}${internetCtx}${memoryCtx}${designerCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}\n\n[CONTEXT]\n${brainContext}${brainInventoryCtx}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
            }
            // ──────────────────────────────────────────────────────────────────
            // [Context Limit Manager] context length 는 "답변을 그만큼 길게 써도 된다"
            // 는 뜻이 아니다: 시스템 프롬프트 + 대화 기록 + 입력 + 생성될 답변 + 여유분 ≤ context length.
            // 요청을 보내기 전에 입력 토큰을 추정해서
            //   (1) 시스템 프롬프트가 과하면 [CONTEXT] 블록을 마지막 수단으로 줄이고
            //   (2) 대화 기록을 남은 예산에 맞게 압축하고 (UI 표시용 chatHistory 는 건드리지 않음)
            //   (3) 동적으로 출력 상한(maxOutputTokens)을 계산한다.
            // ──────────────────────────────────────────────────────────────────
            // Small models (≤4B) routinely fail on prompts that fit their *nominal* context but
            // exceed their *effective* capability (server log shows truncated=0 yet eval time≈0ms —
            // the model emitted EOS as the first token). When detected, budget against a smaller
            // effective window so the system prompt / RAG / history get shrunk proactively.
            const modelParamB = estimateModelParamsB(actualModel);
            const smallModelCap = config.smallModelContextCap; // 0 disables this guard
            const cappedForSmallModel = smallModelCap > 0
                && modelParamB !== null && modelParamB <= 4
                && config.contextLength > smallModelCap;
            const effectiveContextLength = cappedForSmallModel ? smallModelCap : config.contextLength;
            if (cappedForSmallModel) {
                logInfo('Small model detected — capping effective context window for budgeting.', {
                    model: actualModel, paramB: modelParamB,
                    nominalContext: config.contextLength, effectiveContext: effectiveContextLength,
                });
            }
            const ctxLimits: ContextLimits = {
                contextLength: effectiveContextLength,
                maxOutputTokens: config.maxOutputTokens,
                safetyMargin: config.contextSafetyMargin,
                minOutputTokens: 512,
            };
            const imageCount = (reqMessages as any[])
                .reduce((n, m) => n + (Array.isArray(m?.images) ? m.images.length : 0), 0);
            const imageTokenReserve = imageCount * 1024;
            // (1) 시스템 프롬프트는 예산의 ~65%까지만 허용 — 그 이상이면 [CONTEXT] 블록부터 잘라낸다.
            const systemCapTokens = Math.max(
                1024,
                Math.floor((ctxLimits.contextLength - ctxLimits.safetyMargin - ctxLimits.minOutputTokens - imageTokenReserve) * 0.65)
            );
            const { prompt: budgetedSystemPrompt, truncated: systemTruncated } =
                truncateSystemPromptContext(fullSystemPrompt, systemCapTokens);
            if (systemTruncated) {
                logInfo('System prompt context truncated to fit the context window.', { model: actualModel, systemCapTokens });
            }
            const systemTokens = estimateTokens(budgetedSystemPrompt) + 4;
            // (2) 대화 기록 압축.
            const historyBudget = Math.max(
                256,
                ctxLimits.contextLength - systemTokens - ctxLimits.safetyMargin - ctxLimits.minOutputTokens - imageTokenReserve
            );
            let budgetedHistory: ChatMessage[] = reqMessages;
            if (config.autoCompactHistory) {
                const trim = trimHistoryToBudget<ChatMessage>(reqMessages, historyBudget, (n) => ({
                    role: 'system',
                    content: `[이전 대화 ${n}개 메시지는 컨텍스트 한계 때문에 이번 요청에서 생략되었습니다. 필요하면 사용자에게 다시 확인하세요.]`,
                    internal: true,
                }));
                budgetedHistory = trim.messages;
                if (trim.droppedCount > 0) {
                    logInfo('Conversation history compacted to fit the context window.', {
                        model: actualModel, droppedCount: trim.droppedCount, historyBudget,
                    });
                }
            }
            const messagesForRequest: ChatMessage[] = [
-                { role: 'system', content: fullSystemPrompt, internal: true },
+                { role: 'system', content: budgetedSystemPrompt, internal: true },
-                ...reqMessages
+                ...budgetedHistory
            ];
            // (3) 동적 출력 상한.
            const inputTokens = estimateMessagesTokens(messagesForRequest) + imageTokenReserve;
            const outputBudget = computeOutputBudget(inputTokens, ctxLimits);
            const maxOutputTokens = outputBudget.maxOutputTokens;
            if (outputBudget.tight) {
                logError('Prompt nearly fills the context window — output budget is at the minimum.', {
                    model: actualModel, contextLength: ctxLimits.contextLength, inputTokens, maxOutputTokens,
                });
            }
            logInfo('Context budget computed.', {
                model: actualModel, contextLength: ctxLimits.contextLength,
                inputTokens, maxOutputTokens, droppedHistory: reqMessages.length - budgetedHistory.length,
            });
            let finishStopReason: string | undefined;
            // 4. Call AI Engine
            this.abortController = new AbortController();
            requestTimeoutHandle = setTimeout(() => {
@@ -485,6 +600,30 @@ export class AgentExecutor {
            let buffer = '';
            if (loopDepth === 0) {
                // Context-budget preview so the UI can show what actually went into this turn
                // (≈N tokens, Brain N files, open file included?, history compacted?, small-model warning).
                this.webview.postMessage({
                    type: 'contextBudget',
                    value: {
                        model: actualModel,
                        engine,
                        paramB: modelParamB,
                        contextLength: ctxLimits.contextLength,
                        nominalContextLength: config.contextLength,
                        cappedForSmallModel,
                        inputTokens,
                        maxOutputTokens,
                        systemTokens,
                        historyKept: budgetedHistory.length,
                        droppedHistory: reqMessages.length - budgetedHistory.length,
                        systemTruncated,
                        includesOpenFile: !!contextBlock && contextBlock.includes('[Currently open file:'),
                        brainFiles: brainFiles.length,
                        imageCount,
                        tight: outputBudget.tight,
                        smallModel: cappedForSmallModel || (modelParamB !== null && modelParamB <= 3 && inputTokens > 8000),
                    },
                });
                this.webview.postMessage({ type: 'streamStart' });
                this.options.onStreamLifecycle?.start();
            }
@@ -497,18 +636,25 @@ export class AgentExecutor {
                        modelName: actualModel,
                        messages: messagesForRequest.map((m) => ({ role: m.role, content: m.content })),
                        temperature,
                        maxTokens: maxOutputTokens,
                        contextOverflowPolicy: config.contextOverflowPolicy,
                        signal: this.abortController.signal,
                    });
-                    for await (const { token } of stream) {
+                    for await (const { token, stopReason } of stream) {
                        if (this.isStaleRun(runId)) return;
                        if (token) aiResponseText += token;
                        if (stopReason) finishStopReason = stopReason;
                    }
                } catch (err: any) {
                    if (err?.name === 'AbortError' || this.abortController.signal.aborted) {
                        logInfo('Generation aborted by user.');
                    } else {
-                        logError('LM Studio SDK chat failed.', { engine, error: err?.message ?? String(err) });
+                        const msg = err?.message ?? String(err);
-                        this.webview?.postMessage({ type: 'error', value: `LM Studio: ${err?.message ?? err}` });
+                        if (/context\s*length|contextlengthreached|exceed|too\s*long/i.test(msg)) {
                            finishStopReason = 'contextLengthReached';
                        }
                        logError('LM Studio SDK chat failed.', { engine, error: msg });
                        this.webview?.postMessage({ type: 'error', value: `LM Studio: ${msg}` });
                    }
                }
            } else {
@@ -516,7 +662,9 @@ export class AgentExecutor {
                    baseUrl: ollamaUrl,
                    modelName: actualModel,
                    reqMessages: messagesForRequest,
-                    temperature
+                    temperature,
                    maxTokens: maxOutputTokens,
                    contextLength: ctxLimits.contextLength
                });
                const { response, apiUrl: restApiUrl } = request;
                apiUrl = restApiUrl;
@@ -551,6 +699,10 @@ export class AgentExecutor {
                                if (token) {
                                    aiResponseText += token;
                                }
                                const fr = engine === 'lmstudio'
                                    ? json.choices?.[0]?.finish_reason
                                    : (json.done_reason ?? (json.done === true ? 'stop' : undefined));
                                if (fr) finishStopReason = fr;
                            } catch (e: any) {
                                logError('Failed to parse streaming chunk.', { engine, apiUrl, chunk: summarizeText(trimmed, 300), error: e?.message || String(e) });
                            }
@@ -578,6 +730,10 @@ export class AgentExecutor {
                    if (token) {
                        aiResponseText += token;
                    }
                    const fr = engine === 'lmstudio'
                        ? json.choices?.[0]?.finish_reason
                        : (json.done_reason ?? (json.done === true ? 'stop' : undefined));
                    if (fr) finishStopReason = fr;
                } catch (e: any) {
                    logError('Failed to parse final streaming buffer.', { engine, apiUrl, buffer: summarizeText(buffer, 300), error: e?.message || String(e) });
                }
@@ -617,12 +773,15 @@ export class AgentExecutor {
                            modelName: actualModel,
                            messages: messagesForRequest.map((m) => ({ role: m.role, content: m.content })),
                            temperature,
                            maxTokens: maxOutputTokens,
                            contextOverflowPolicy: config.contextOverflowPolicy,
                            signal: this.abortController.signal,
                        });
                        let retryText = '';
-                        for await (const { token } of retryStream) {
+                        for await (const { token, stopReason } of retryStream) {
                            if (this.isStaleRun(runId)) return;
                            if (token) retryText += token;
                            if (stopReason) finishStopReason = stopReason;
                        }
                        if (retryText.trim()) {
                            aiResponseText = retryText;
@@ -642,11 +801,14 @@ export class AgentExecutor {
                            engine,
                            messages: messagesForRequest,
                            temperature,
                            maxTokens: maxOutputTokens,
                            contextLength: ctxLimits.contextLength,
                            signal: this.abortController?.signal,
                        });
-                        if (fallback && fallback.trim()) {
+                        if (fallback.stopReason) finishStopReason = fallback.stopReason;
-                            aiResponseText = fallback;
+                        if (fallback.text && fallback.text.trim()) {
-                            logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.length });
+                            aiResponseText = fallback.text;
                            logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.text.length });
                        }
                    } catch (recoverErr: any) {
                        logError('Non-streaming fallback also failed.', {
@@ -688,6 +850,18 @@ export class AgentExecutor {
                    ].join('\n');
                }
            }
            // Surface truncated/abnormal generation so the user knows the answer is incomplete.
            const stopKind = classifyStopReason(finishStopReason);
            if (stopKind === 'output-limit' || stopKind === 'context-overflow' || stopKind === 'error') {
                logError('Generation stopped abnormally.', {
                    model: actualModel, engine, stopReason: finishStopReason, stopKind,
                    inputTokens, maxOutputTokens, answerChars: assistantContent.length,
                });
            }
            const notice = truncationNotice(stopKind);
            if (notice && assistantContent.trim()) {
                assistantContent = assistantContent.trimEnd() + notice;
            }
            const finalAssistantContent = assistantContent;
            const assistantMessage: ChatMessage = { role: 'assistant', content: finalAssistantContent, internal: false, rationale };
@@ -700,7 +874,9 @@ export class AgentExecutor {
                const promptCharCount = messagesForRequest.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
                logError('Model returned an empty response without actions.', {
                    model: actualModel, engine, apiUrl, loopDepth,
-                    promptCharCount, messageCount: messagesForRequest.length,
+                    promptCharCount, inputTokens, maxOutputTokens, contextLength: ctxLimits.contextLength,
                    estimatedOverflow: outputBudget.tight, stopReason: finishStopReason,
                    messageCount: messagesForRequest.length,
                    fallbackTried: loopDepth === 0 ? 'yes' : 'no',
                });
                // Cheap heuristic: parse a parameter-count hint out of the
@@ -717,20 +893,23 @@ export class AgentExecutor {
                const contextLimitHint =
                    'LM Studio 로그에 `n_tokens = N, truncated = 0` 인데 `eval time` 이 0ms 라면 모델이 첫 토큰부터 EOS 를 뱉은 것입니다. 보통 컨텍스트 한계 초과 또는 모델 용량 부족입니다. 더 큰 모델(7B+)로 교체하거나 컨텍스트를 줄여 보세요.';
                const looksOverflow = outputBudget.tight || inputTokens > ctxLimits.contextLength - ctxLimits.safetyMargin;
                this.webview.postMessage({
                    type: 'error',
                    value: [
                        'AI 엔진이 빈 응답을 반환했습니다 (스트리밍 + non-streaming 폴백 모두 실패).',
                        `Engine: ${engine}`,
                        `Model: ${actualModel}`,
-                        `Prompt size: ${promptCharCount.toLocaleString()} chars across ${messagesForRequest.length} message(s)`,
+                        `Prompt: ~${inputTokens.toLocaleString()} tokens (${promptCharCount.toLocaleString()} chars, ${messagesForRequest.length} messages) / context window ${ctxLimits.contextLength.toLocaleString()} tokens`,
                        `Output budget: ${maxOutputTokens.toLocaleString()} tokens`,
                        ...(finishStopReason ? [`Stop reason: ${finishStopReason}`] : []),
                        '',
                        '다음을 시도해보세요:',
                        '  • LM Studio에서 모델이 실제로 로드되어 있는지 확인',
-                        promptCharCount > 16000
+                        looksOverflow
-                            ? '  • 프롬프트가 너무 큽니다 (16k chars 초과). Skill/Brain 컨텍스트를 좁혀 보세요.'
+                            ? '  • 입력이 모델 context window 에 가깝습니다. `/newChat` 으로 대화를 새로 시작하거나, Skill/Brain 컨텍스트를 줄이거나, Settings 의 `g1nation.contextLength` 를 모델 실제 값으로 맞추세요.'
                            : '  • 다른 모델로 전환하거나 LM Studio 서버를 재시작',
-                        '  • Settings에서 maxContextSize 또는 memoryLongTermFiles 줄이기',
+                        '  • Settings에서 maxContextSize / memoryLongTermFiles 줄이기',
                        ...(looksSmall || promptIsLarge ? ['  • ' + contextLimitHint] : []),
                    ].join('\n')
                });
@@ -765,6 +944,12 @@ export class AgentExecutor {
            }
            this.statusBarManager.updateStatus(AgentStatus.Success);
            if (this._lastRetrievalInfo) {
                this.webview.postMessage({
                    type: 'usedScope',
                    value: { ...this._lastRetrievalInfo, hasAgentSelected: !!options.agentSkillFile },
                });
            }
            this.webview.postMessage({ type: 'streamChunk', value: finalAssistantContent });
        } catch (error: any) {
@@ -863,12 +1048,17 @@ export class AgentExecutor {
    private async callAgent(role: AgentRole, prompt: string, modelName: string, options: any): Promise<string> {
        const persona = AGENT_PROMPTS[role];
-        const { ollamaUrl } = getConfig();
+        const { ollamaUrl, contextLength, maxOutputTokens, contextSafetyMargin, contextOverflowPolicy } = getConfig();
        const messages: ChatMessage[] = [
            { role: 'system', content: persona },
            { role: 'user', content: prompt }
        ];
        // Dynamic output cap so input + output stays within the context window.
        const inputTokens = estimateMessagesTokens(messages);
        const { maxOutputTokens: subMaxTokens } = computeOutputBudget(inputTokens, {
            contextLength, maxOutputTokens, safetyMargin: contextSafetyMargin, minOutputTokens: 512,
        });
        const engine = resolveEngine(ollamaUrl);
        let responseText = '';
@@ -879,6 +1069,8 @@ export class AgentExecutor {
                    modelName,
                    messages: messages.map((m) => ({ role: m.role, content: m.content })),
                    temperature: 0.3,
                    maxTokens: subMaxTokens,
                    contextOverflowPolicy,
                    signal: this.abortController?.signal,
                });
                for await (const { token } of stream) {
@@ -896,7 +1088,9 @@ export class AgentExecutor {
            baseUrl: ollamaUrl,
            modelName: modelName,
            reqMessages: messages,
-            temperature: 0.3 // Use lower temperature for planning and research
+            temperature: 0.3, // Use lower temperature for planning and research
            maxTokens: subMaxTokens,
            contextLength
        });
        const reader = request.response.body?.getReader();
@@ -1218,16 +1412,17 @@ export class AgentExecutor {
    /**
     * 프롬프트에 로컬 파일/디렉토리 경로가 포함되어 있는지 감지합니다.
-     * 절대 경로: /Volumes/, /Users/, /home/, ~/
+     * POSIX 절대 경로: /Volumes/, /Users/, /home/, /opt/, ~/
     * Windows 절대 경로: C:\..., D:/..., \\server\share\...
     * 상대 경로: src/..., lib/..., components/..., tests/... 등 + 파일 확장자
     */
    private containsLocalFilePath(prompt: string): boolean {
-        // 절대 경로
+        // 절대 경로 (POSIX + Windows 드라이브 문자 + UNC)
-        if (/(?:\/Volumes\/|\/Users\/|\/home\/|~\/)[^\s`"'<>]+/i.test(prompt)) {
+        if (AgentExecutor.ABS_PATH_RE.test(prompt) || AgentExecutor.WIN_ABS_PATH_RE.test(prompt)) {
            return true;
        }
-        // 상대 경로 패턴: 디렉토리/파일명.확장자 형태 (src/lib/engine.ts, components/App.tsx 등)
+        // 상대 경로 패턴: 디렉토리/파일명.확장자 형태 (src/lib/engine.ts, components\App.tsx 등)
-        if (/(?:^|[\s,])(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)\//i.test(prompt)
+        if (/(?:^|[\s,])(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)[\\/]/i.test(prompt)
            && /\.[a-z]{1,6}(?:[\s,;)\]]|$)/i.test(prompt)) {
            return true;
        }
@@ -1551,15 +1746,21 @@ export class AgentExecutor {
    private extractLocalProjectPaths(prompt: string, rootPath?: string): string[] {
        const results: string[] = [];
        const stripTrailingPunct = (s: string) => s.replace(/[),.;\]]+$/g, '');
-        // 1. 절대 경로 감지: /Volumes/, /Users/, /home/, ~/
+        // 1a. POSIX 절대 경로: /Volumes/, /Users/, /home/, /opt/, ~/
-        const absMatches = prompt.match(/(?:\/Volumes\/|\/Users\/|\/home\/|~\/)[^\s`"'<>]+/gi) || [];
+        const absMatches = prompt.match(new RegExp(POSIX_ABS_PATH_SRC, 'gi')) || [];
        for (const m of absMatches) {
-            results.push(m.replace(/[),.;\]]+$/g, ''));
+            results.push(stripTrailingPunct(m));
        }
        // 1b. Windows 절대 경로: C:\..., D:/..., \\server\share\...
        const winMatches = prompt.match(new RegExp(WIN_ABS_PATH_SRC, 'gi')) || [];
        for (const m of winMatches) {
            results.push(stripTrailingPunct(m));
        }
-        // 2. 상대 경로 감지: src/lib/engine.ts, components/App.tsx 등
+        // 2. 상대 경로 감지: src/lib/engine.ts, components/App.tsx, src\lib\engine.ts 등
-        const relMatches = prompt.match(/(?:^|[\s,])(?:(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)\/[^\s`"'<>]+\.[a-z]{1,6})/gi) || [];
+        const relMatches = prompt.match(/(?:^|[\s,])(?:(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)[\\/][^\s`"'<>]+\.[a-z]{1,6})/gi) || [];
        for (const m of relMatches) {
            const cleaned = m.trim().replace(/^,\s*/, '').replace(/[),.;\]]+$/g, '');
            if (rootPath) {
@@ -1924,6 +2125,7 @@ export class AgentExecutor {
    private buildMemoryContext(currentPrompt: string, activeBrain: BrainProfile, agentSkillFile?: string): string {
        const config = getConfig();
        this._lastRetrievalInfo = null;
        if (!config.memoryEnabled) return '';
        // Update memory manager config in case settings changed
@@ -1956,6 +2158,27 @@ export class AgentExecutor {
            scopeFolders: scope.folders
        });
        // Stash what actually fed this turn so handlePrompt can show it under the answer.
        const brainRoot = activeBrain.localBrainPath;
        const rel = (p?: string) => (p ? (path.relative(brainRoot, p) || p) : '');
        this._lastRetrievalInfo = {
            agentName: scope.agent?.name ?? null,
            scoped: scope.folders.length > 0,
            source: String((scope as any).source ?? ''),
            configuredFolders: scope.folders.map((abs) => rel(abs)),
            usedBrainFiles: result.selectedChunks
                .filter((c) => c.source === 'brain-memory' && c.metadata.filePath)
                .map((c) => rel(c.metadata.filePath))
                .filter((p, i, arr) => p && arr.indexOf(p) === i),
            usedMemoryLayers: Array.from(new Set(
                result.selectedChunks
                    .filter((c) => c.source !== 'brain-memory' && c.source !== 'brain-trace')
                    .map((c) => c.source as string)
            )),
            totalChunks: result.totalChunks,
            selectedChunks: result.selectedChunks.length,
        };
        return this.retrievalOrchestrator.buildContextString(result);
    }
@@ -1999,8 +2222,14 @@ export class AgentExecutor {
        modelName: string;
        reqMessages: ChatMessage[];
        temperature: number;
        /** Dynamic output-token cap computed from the remaining context budget. */
        maxTokens?: number;
        /** Model context window in tokens (used for Ollama's num_ctx). */
        contextLength?: number;
    }): Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string }> {
        const { baseUrl, modelName, reqMessages, temperature } = params;
        const maxTokens = Math.max(256, params.maxTokens ?? 4096);
        const numCtx = Math.max(2048, params.contextLength ?? 32768);
        const engine = resolveEngine(baseUrl);  // 사용자가 설정한 엔진만 사용
        const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
        const messageVariants = this.buildEngineMessageVariants(reqMessages, engine);
@@ -2015,8 +2244,8 @@ export class AgentExecutor {
                    messages: variant.messages,
                    stream: true,
                    ...(engine === 'lmstudio'
-                        ? { max_tokens: 4096, temperature }
+                        ? { max_tokens: maxTokens, temperature }
-                        : { options: { num_ctx: 32768, num_predict: 4096, temperature } }),
+                        : { options: { num_ctx: numCtx, num_predict: maxTokens, temperature } }),
                };
                // 일시적 네트워크 오류용 retry (최대 2회, 지수 backoff)
@@ -2100,9 +2329,13 @@ export class AgentExecutor {
        engine: 'lmstudio' | 'ollama';
        messages: ChatMessage[];
        temperature: number;
        maxTokens?: number;
        contextLength?: number;
        signal?: AbortSignal;
-    }): Promise<string> {
+    }): Promise<{ text: string; stopReason?: string }> {
        const { baseUrl, modelName, engine, messages, temperature, signal } = params;
        const maxTokens = Math.max(256, params.maxTokens ?? 4096);
        const numCtx = Math.max(2048, params.contextLength ?? 32768);
        const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
        const variants = this.buildEngineMessageVariants(messages, engine);
        const body = {
@@ -2110,8 +2343,8 @@ export class AgentExecutor {
            messages: variants[0].messages,
            stream: false,
            ...(engine === 'lmstudio'
-                ? { max_tokens: 4096, temperature }
+                ? { max_tokens: maxTokens, temperature }
-                : { options: { num_ctx: 32768, num_predict: 4096, temperature } }),
+                : { options: { num_ctx: numCtx, num_predict: maxTokens, temperature } }),
        };
        const response = await fetch(apiUrl, {
            method: 'POST',
@@ -2127,11 +2360,17 @@ export class AgentExecutor {
        try {
            const json = JSON.parse(text);
            if (engine === 'lmstudio') {
-                return json?.choices?.[0]?.message?.content ?? '';
+                return {
                    text: json?.choices?.[0]?.message?.content ?? '',
                    stopReason: json?.choices?.[0]?.finish_reason,
                };
            }
-            return json?.message?.content ?? json?.response ?? '';
+            return {
                text: json?.message?.content ?? json?.response ?? '',
                stopReason: json?.done_reason ?? (json?.done === true ? 'stop' : undefined),
            };
        } catch {
-            return '';
+            return { text: '' };
        }
    }
@@ -24,6 +24,11 @@ export abstract class BaseAgent {
        const isOllama = ollamaUrl.includes(':11434') || ollamaUrl.includes('ollama');
        const endpoint = isOllama ? `${ollamaUrl}/api/chat` : `${ollamaUrl}/v1/chat/completions`;
        // 컨텍스트 초과 방지를 위해 출력 토큰 상한을 항상 명시한다 (서브에이전트 중간 산출물용).
        const { contextLength, maxOutputTokens } = getConfig();
        const numCtx = Math.max(2048, contextLength);
        const outCap = Math.max(256, maxOutputTokens);
        let lastError: any;
        for (let attempt = 1; attempt <= 3; attempt++) {
            const controller = new AbortController();
@@ -40,12 +45,13 @@ export abstract class BaseAgent {
                    model: this.modelName,
                    messages,
                    stream: false,
-                    options: { temperature: 0.3 }
+                    options: { temperature: 0.3, num_ctx: numCtx, num_predict: outCap }
                } : {
                    model: this.modelName,
                    messages,
                    stream: false,
-                    temperature: 0.3
+                    temperature: 0.3,
                    max_tokens: outCap
                }),
                signal: combinedSignal
            });
@@ -30,6 +30,14 @@ export interface IAgentConfig {
    memoryShortTermMessages: number;
    memoryMediumTermSessions: number;
    memoryLongTermFiles: number;
    // ─── 컨텍스트 한계 관리 ───
    contextLength: number;
    maxOutputTokens: number;
    contextSafetyMargin: number;
    contextOverflowPolicy: 'stopAtLimit' | 'truncateMiddle' | 'rollingWindow';
    autoCompactHistory: boolean;
    /** 작은 모델(≤4B) 감지 시 예산 계산에 쓸 유효 context window 상한. 0 = 비활성화. */
    smallModelContextCap: number;
 }
 // ─── 경로 정규화 유틸리티 ───
@@ -98,7 +106,16 @@ export function getConfig(): IAgentConfig {
        memoryEnabled: cfg.get<boolean>('memoryEnabled', true),
        memoryShortTermMessages: Math.max(0, cfg.get<number>('memoryShortTermMessages', 8)),
        memoryMediumTermSessions: Math.max(0, cfg.get<number>('memoryMediumTermSessions', 5)),
-        memoryLongTermFiles: Math.max(0, cfg.get<number>('memoryLongTermFiles', 6))
+        memoryLongTermFiles: Math.max(0, cfg.get<number>('memoryLongTermFiles', 6)),
        contextLength: Math.max(2048, cfg.get<number>('contextLength', 32768)),
        maxOutputTokens: Math.max(256, cfg.get<number>('maxOutputTokens', 4096)),
        contextSafetyMargin: Math.max(0, cfg.get<number>('contextSafetyMargin', 2048)),
        contextOverflowPolicy: ((): IAgentConfig['contextOverflowPolicy'] => {
            const v = cfg.get<string>('contextOverflowPolicy', 'stopAtLimit');
            return v === 'truncateMiddle' || v === 'rollingWindow' ? v : 'stopAtLimit';
        })(),
        autoCompactHistory: cfg.get<boolean>('autoCompactHistory', true),
        smallModelContextCap: Math.max(0, cfg.get<number>('smallModelContextCap', 8192))
    };
 }
@@ -0,0 +1,254 @@
 /**
 * ============================================================
 * Context Manager (컨텍스트 한계 관리)
 *
 * "context length = 132k" 는 "답변을 132k 토큰까지 생성해도 된다" 가 아닙니다.
 *   시스템 프롬프트 + 대화 기록 + 입력 문서 + 생성될 답변 + 여유분 ≤ context length
 *
 * 이 모듈은 요청을 보내기 *전에* 입력 토큰을 추정하고,
 *   - 동적으로 출력 상한(maxTokens)을 계산하고,
 *   - 대화 기록이 예산을 넘으면 오래된 메시지를 잘라내고,
 *   - 그래도 넘으면 시스템 프롬프트의 [CONTEXT] 블록을 마지막 수단으로 줄이고,
 *   - 생성 종료 사유(stopReason / finish_reason)를 "정상 / 출력한계 / 컨텍스트초과 / 사용자중단"
 *     으로 분류해 호출자가 잘린 응답을 감지할 수 있게 합니다.
 * ============================================================
 */
 export type ChatRole = 'user' | 'assistant' | 'system';
 export interface BudgetMessage {
    role: ChatRole;
    content: string;
    /** internal/system bookkeeping messages that should be kept verbatim where possible */
    internal?: boolean;
 }
 export interface ContextLimits {
    /** 모델의 context window (프롬프트 + 생성 합산 한계). */
    contextLength: number;
    /** 한 응답에서 생성할 토큰 수의 상한 (이 값을 넘기지 않음). */
    maxOutputTokens: number;
    /** 추정 오차를 흡수하기 위한 여유분. */
    safetyMargin: number;
    /** 출력에 항상 확보해 둘 최소 토큰 수. */
    minOutputTokens: number;
 }
 export const DEFAULT_CONTEXT_LIMITS: ContextLimits = {
    contextLength: 32768,
    maxOutputTokens: 4096,
    safetyMargin: 2048,
    minOutputTokens: 512,
 };
 /** LM Studio `contextOverflowPolicy` 값 — 우리가 예산 계산에 실패했을 때의 안전망. */
 export type ContextOverflowPolicy = 'stopAtLimit' | 'truncateMiddle' | 'rollingWindow';
 export const DEFAULT_OVERFLOW_POLICY: ContextOverflowPolicy = 'stopAtLimit';
 /**
 * 텍스트의 토큰 수를 대략 추정합니다.
 *
 * 정밀한 토크나이저가 없으므로 문자 기반 휴리스틱을 사용합니다:
 *  - CJK(한/중/일) 글자: ~1.6 토큰/글자 (byte-level BPE 기준 보수적)
 *  - 그 외(영문/코드/기호): ~0.30 토큰/글자
 * 약간 과대평가하는 쪽으로 잡아 컨텍스트 초과를 예방합니다.
 */
 export function estimateTokens(text: string): number {
    if (!text) return 0;
    const cjkChars = (text.match(/[　-〿぀-ヿ㐀-䶿一-鿿가-힯＀-￯]/g) || []).length;
    const otherChars = text.length - cjkChars;
    return Math.ceil(cjkChars * 1.6 + otherChars * 0.3);
 }
 /**
 * 모델 식별자에서 파라미터 규모(B 단위)를 대략 추출합니다. 모르면 null.
 * 예: "qwen2.5-7b" → 7, "llama-3.1-8b-instruct" → 8, "gemma-3n-e2b" / "gemma4:e2b" → 2,
 *     "phi-3-mini" → null (숫자 없음), "qwen3-30b-a3b" → 30. "4bit" 같은 양자화 표기는 매칭 안 됨.
 */
 export function estimateModelParamsB(modelId: string | null | undefined): number | null {
    if (!modelId) return null;
    const m = String(modelId).match(/(?:^|[-_/:.\s])e?(\d+(?:\.\d+)?)\s*b(?![a-z0-9])/i);
    if (!m) return null;
    const n = Number(m[1]);
    return Number.isFinite(n) && n > 0 && n < 2000 ? n : null;
 }
 /** role/구분자 등 메시지 1개당 발생하는 고정 오버헤드(대략). */
 const PER_MESSAGE_TOKEN_OVERHEAD = 4;
 export function estimateMessageTokens(msg: BudgetMessage): number {
    return estimateTokens(msg.content || '') + PER_MESSAGE_TOKEN_OVERHEAD;
 }
 export function estimateMessagesTokens(messages: BudgetMessage[]): number {
    return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
 }
 /**
 * 입력 토큰 수가 주어졌을 때 안전하게 생성할 수 있는 출력 토큰 상한을 계산합니다.
 *
 *   available = contextLength - inputTokens - safetyMargin
 *   maxOutput = clamp(available, minOutputTokens, maxOutputTokens)
 *
 * available 이 minOutputTokens 보다 작으면 입력이 이미 컨텍스트를 거의 다 먹은 상태이므로
 * `tight: true` 와 함께 minOutputTokens 를 그대로 돌려줍니다 (호출자가 추가로 줄여야 함).
 */
 export function computeOutputBudget(
    inputTokens: number,
    limits: ContextLimits = DEFAULT_CONTEXT_LIMITS
 ): { maxOutputTokens: number; available: number; tight: boolean } {
    const { contextLength, maxOutputTokens, safetyMargin, minOutputTokens } = limits;
    const available = contextLength - inputTokens - safetyMargin;
    if (available <= minOutputTokens) {
        return { maxOutputTokens: minOutputTokens, available, tight: true };
    }
    return {
        maxOutputTokens: Math.max(minOutputTokens, Math.min(available, maxOutputTokens)),
        available,
        tight: false,
    };
 }
 export interface TrimResult<M extends BudgetMessage> {
    messages: M[];
    /** 잘려나간 메시지 개수 (0 이면 변화 없음). */
    droppedCount: number;
    /** 잘라낸 뒤의 입력 토큰 추정치. */
    tokensAfter: number;
 }
 /**
 * 대화 기록을 토큰 예산 안에 맞춥니다.
 *
 * 전략:
 *  1. 항상 마지막 메시지(보통 현재 사용자 질문)는 유지.
 *  2. 최근 메시지부터 역순으로 예산이 허용하는 만큼 채움.
 *  3. 하나라도 잘렸으면 맨 앞에 `[이전 대화 N개 생략]` 마커를 끼워 모델이 맥락 누락을 인지하게 함.
 *
 * 주의: 여기서 잘라내는 것은 *요청에 보낼* 메시지 배열일 뿐, UI에 표시되는 전체 기록은 그대로 둡니다.
 */
 export function trimHistoryToBudget<M extends BudgetMessage>(
    messages: M[],
    budgetTokens: number,
    makeMarker: (droppedCount: number) => M
 ): TrimResult<M> {
    if (messages.length === 0) {
        return { messages, droppedCount: 0, tokensAfter: 0 };
    }
    const total = estimateMessagesTokens(messages);
    if (total <= budgetTokens) {
        return { messages, droppedCount: 0, tokensAfter: total };
    }
    // 최근 메시지부터 역순으로 채움. 최소 1개(마지막 메시지)는 무조건 유지.
    const kept: M[] = [];
    let used = 0;
    for (let i = messages.length - 1; i >= 0; i--) {
        const t = estimateMessageTokens(messages[i]);
        if (kept.length > 0 && used + t > budgetTokens) {
            break;
        }
        kept.unshift(messages[i]);
        used += t;
    }
    const droppedCount = messages.length - kept.length;
    if (droppedCount > 0) {
        const marker = makeMarker(droppedCount);
        kept.unshift(marker);
        used += estimateMessageTokens(marker);
    }
    return { messages: kept, droppedCount, tokensAfter: used };
 }
 /** 시스템 프롬프트 안에서 "잘라내도 되는" 보조 컨텍스트 영역의 시작/끝 마커. */
 export const CONTEXT_OPEN_MARKER = '\n\n[CONTEXT]\n';
 export const CONTEXT_CLOSE_MARKER = '\n[/CONTEXT]\n';
 /**
 * 시스템 프롬프트가 너무 클 때 마지막 수단으로 `[CONTEXT] … [/CONTEXT]` 사이의 보조 컨텍스트
 * (브레인/메모리/열린 파일/RAG 등 — 조립 단계에서 끼워 넣는 데이터)만 잘라냅니다.
 * 핵심 지시문(앞부분)과 마무리 지시문(예: negative constraints, agent system prompt — 뒷부분)은
 * 절대 건드리지 않습니다. `[/CONTEXT]` 마커가 없으면 `[CONTEXT]` 이후 전체를 trim 대상으로 봅니다.
 *
 * @param systemPrompt 조립이 끝난 전체 시스템 프롬프트
 * @param maxTokens    시스템 프롬프트에 허용할 토큰 상한
 */
 export function truncateSystemPromptContext(
    systemPrompt: string,
    maxTokens: number
 ): { prompt: string; truncated: boolean } {
    if (estimateTokens(systemPrompt) <= maxTokens) {
        return { prompt: systemPrompt, truncated: false };
    }
    const openIdx = systemPrompt.indexOf(CONTEXT_OPEN_MARKER);
    if (openIdx < 0) {
        // 보조 컨텍스트 영역이 없으면 전체에서 뒤를 잘라낼 수밖에 없음.
        const approxChars = Math.max(1000, Math.floor(maxTokens / 0.3));
        return {
            prompt: systemPrompt.slice(0, approxChars) + '\n\n[…시스템 프롬프트가 컨텍스트 한계로 잘렸습니다…]',
            truncated: true,
        };
    }
    const bodyStart = openIdx + CONTEXT_OPEN_MARKER.length;
    const closeIdx = systemPrompt.indexOf(CONTEXT_CLOSE_MARKER, bodyStart);
    const head = systemPrompt.slice(0, bodyStart);              // 지시문 + "[CONTEXT]\n"
    const body = closeIdx >= 0 ? systemPrompt.slice(bodyStart, closeIdx) : systemPrompt.slice(bodyStart);
    const tail = closeIdx >= 0 ? systemPrompt.slice(closeIdx) : '';   // "[/CONTEXT]" + negative/agent 등
    const fixedTokens = estimateTokens(head) + estimateTokens(tail);
    const remainForBody = maxTokens - fixedTokens - 64;
    if (remainForBody <= 0) {
        return {
            prompt: head + '[…보조 컨텍스트는 컨텍스트 한계로 모두 생략되었습니다…]' + tail,
            truncated: true,
        };
    }
    // CJK 비중에 따라 글자수→토큰 비율이 달라지므로 보수적으로 0.4 토큰/글자로 환산.
    const approxChars = Math.floor(remainForBody / 0.4);
    const trimmedBody = body.length <= approxChars
        ? body
        : body.slice(0, approxChars) + '\n\n[…이하 보조 컨텍스트는 컨텍스트 한계로 생략됨…]';
    return { prompt: head + trimmedBody + tail, truncated: true };
 }
 export type GenerationStopKind =
    | 'complete'        // 정상 종료 (EOS / stop string)
    | 'output-limit'    // maxTokens 도달 — 답변이 중간에 잘림
    | 'context-overflow'// 입력+출력이 context window 초과
    | 'user-stopped'    // 사용자 취소
    | 'tool-calls'      // 툴 호출로 종료
    | 'error'           // 모델/런타임 오류
    | 'unknown';
 /**
 * 엔진별 종료 사유 문자열을 공통 분류값으로 정규화합니다.
 *  - LM Studio SDK: `stats.stopReason` — eosFound / stopStringFound / maxPredictedTokensReached / contextLengthReached / userStopped / toolCalls / failed / modelUnloaded
 *  - OpenAI 호환 REST: `choices[].finish_reason` — stop / length / tool_calls / content_filter
 *  - Ollama: `done_reason` — stop / length / load
 */
 export function classifyStopReason(raw: string | null | undefined): GenerationStopKind {
    if (!raw) return 'unknown';
    const r = String(raw).toLowerCase();
    if (/(maxpredictedtokensreached|^length$|max_tokens)/.test(r)) return 'output-limit';
    if (/(contextlengthreached|context_length|context_overflow|contextoverflow)/.test(r)) return 'context-overflow';
    if (/(eosfound|stopstringfound|^stop$|^end$|stop_sequence|content_filter)/.test(r)) return 'complete';
    if (/(userstopped|aborted|cancel)/.test(r)) return 'user-stopped';
    if (/(toolcalls|tool_calls)/.test(r)) return 'tool-calls';
    if (/(failed|error|modelunloaded)/.test(r)) return 'error';
    return 'unknown';
 }
 /** 잘린 응답일 때 사용자에게 덧붙일 한 줄 안내. 정상 종료면 빈 문자열. */
 export function truncationNotice(kind: GenerationStopKind): string {
    switch (kind) {
        case 'output-limit':
            return '\n\n> ⚠️ 답변이 출력 토큰 한계에 도달해 잘렸습니다. "이어서 작성해줘" 라고 요청하면 계속 생성합니다.';
        case 'context-overflow':
            return '\n\n> ⚠️ 입력 컨텍스트가 모델의 context window 를 초과했습니다. 대화를 새로 시작하거나(`/newChat`) Settings 에서 `g1nation.contextLength` 를 모델 실제 값으로 맞추고, Brain/Skill 컨텍스트를 줄여보세요.';
        case 'error':
            return '\n\n> ⚠️ 모델이 비정상 종료했습니다 (컨텍스트 초과 또는 모델 용량 부족 가능). 더 큰 모델로 바꾸거나 컨텍스트를 줄여보세요.';
        default:
            return '';
    }
 }
@@ -82,19 +82,20 @@ export function resolveBrainDirFromConfig(): string {
 * `_sendAgentsList` and `_createAgent` operate on).
 *
 * Resolution order:
- *   1. The first VS Code workspace folder + `/.agent/skills/` (creating the
+ *   1. VS Code config `g1nation.agentSkillsPath` (after `~` + abs-path normalization),
 *      if the user explicitly pointed at a folder.
 *   2. The first VS Code workspace folder + `/.agent/skills/` (creating the
 *      folder is the caller's responsibility).
- *   2. Empty string when no workspace is open — callers must short-circuit.
+ *   3. Empty string when no workspace is open — callers must short-circuit.
 *
- * The legacy default `E:\Wiki\Agent\.agent\skills` from sidebarProvider.ts is
+ * Note: a previous version hard-coded `E:\Wiki\Agent\.agent\skills` as a
- * preserved as a fall-through hint for the original author's machine.
+ * fall-through for the original author's Windows machine. That made behavior
 * differ between machines (and never matched anything on macOS/Linux), so it
 * was removed — use `g1nation.agentSkillsPath` for a non-workspace location.
 */
 export function resolveAgentSkillsDir(): string {
-    const legacy = 'E:\\Wiki\\Agent\\.agent\\skills';
+    const configured = resolvePathInput(_safeGetConfigString('g1nation', 'agentSkillsPath'));
-    try {
+    if (configured) return configured;
        const fs = require('fs') as typeof import('fs');
        if (fs.existsSync(legacy)) return legacy;
    } catch { /* fs unavailable in some isolated tests */ }
    const folders = vscode.workspace.workspaceFolders;
    if (folders && folders.length > 0) {
@@ -111,8 +112,12 @@ export function resolveAgentSkillsDir(): string {
 */
 export function isInside(parent: string, child: string): boolean {
    if (!parent || !child) return false;
-    const p = path.resolve(parent);
+    // Windows file systems are case-insensitive and path.resolve may emit a
-    const c = path.resolve(child);
+    // mixed-case drive letter, so normalize case there before comparing —
    // otherwise legitimate writes get rejected just because of casing.
    const norm = (p: string) => (process.platform === 'win32' ? path.resolve(p).toLowerCase() : path.resolve(p));
    const p = norm(parent);
    const c = norm(child);
    if (c === p) return true;
    return c.startsWith(p + path.sep);
 }
@@ -11,13 +11,26 @@ export interface ChatStreamRequest {
    modelName: string;
    messages: ChatStreamMessage[];
    temperature: number;
    /** Upper bound on tokens to generate. Omit to fall back to a conservative default. */
    maxTokens?: number;
    /** LM Studio context-overflow safety net used only if the prompt still exceeds the window. */
    contextOverflowPolicy?: 'stopAtLimit' | 'truncateMiddle' | 'rollingWindow';
    signal?: AbortSignal;
 }
 /**
 * One stream event. `token` carries generated text (possibly empty for the final event);
 * `stopReason` is set on the *last* event only and is the SDK's `stats.stopReason`
 * (e.g. `eosFound`, `maxPredictedTokensReached`, `contextLengthReached`, `userStopped`).
 */
 export interface ChatStreamEvent {
    token: string;
    stopReason?: string;
 }
 export interface IChatStreamer {
    /** Token-level streaming for an LM Studio chat completion via the WebSocket SDK. */
-    stream(req: ChatStreamRequest): AsyncIterable<{ token: string }>;
+    stream(req: ChatStreamRequest): AsyncIterable<ChatStreamEvent>;
    /**
     * Drop the SDK's cached handle for `modelName`. Callers invoke this when
     * the previous stream returned zero tokens with no error — a symptom of a
@@ -39,7 +52,7 @@ export interface IChatStreamer {
 export class LMStudioStreamer implements IChatStreamer {
    constructor(private readonly client: ILMStudioClient) {}
-    async *stream(req: ChatStreamRequest): AsyncIterable<{ token: string }> {
+    async *stream(req: ChatStreamRequest): AsyncIterable<ChatStreamEvent> {
        const trimmedModel = (req.modelName || '').trim();
        if (!trimmedModel) {
            throw new LMStudioLifecycleError('LMStudioStreamer.stream called without a model name.');
@@ -62,6 +75,10 @@ export class LMStudioStreamer implements IChatStreamer {
            const prediction = (model as any).respond(req.messages, {
                temperature: req.temperature,
                maxTokens: req.maxTokens ?? 4096,
                // Safety net: if our own token budgeting still underestimated and the prompt
                // exceeds the model's context window, decide whether the SDK should fail
                // loudly (stopAtLimit — default) or silently drop content.
                contextOverflowPolicy: req.contextOverflowPolicy ?? 'stopAtLimit',
                signal: req.signal,
            });
@@ -98,7 +115,22 @@ export class LMStudioStreamer implements IChatStreamer {
                req.signal?.removeEventListener?.('abort', onAbort);
            }
-            if (!caught) return;
+            if (!caught) {
                if (req.signal?.aborted) return;
                // The prediction object is also a Promise<PredictionResult>; awaiting it after
                // the stream drains gives us stats.stopReason so callers can tell a truncated
                // answer (maxPredictedTokensReached / contextLengthReached) from a normal one.
                let stopReason: string | undefined;
                try {
                    const result: any = await prediction;
                    stopReason = result?.stats?.stopReason;
                    if (stopReason) {
                        logInfo('LM Studio SDK chat stream finished.', { model: trimmedModel, stopReason, tokensYielded: yielded });
                    }
                } catch { /* result unavailable on some SDK versions — non-fatal */ }
                yield { token: '', stopReason: stopReason ?? 'eosFound' };
                return;
            }
            const errMsg = String(caught?.message ?? caught);
            const handleDead = /\bdisposed\b/i.test(errMsg)
@@ -0,0 +1,220 @@
 /**
 * ============================================================
 * Brain Index — persistent, mtime-keyed tokenized cache of the Second Brain
 *
 * RAG 검색은 매 질의마다 브레인의 모든 .md 파일을 읽고 토크나이즈해서 TF-IDF 점수를
 * 계산했습니다 — 파일 수가 많아지면 그게 병목입니다.
 *
 * 이 모듈은 `<brainPath>/.astra/brain-index.json` 에 파일별 토큰 배열을 (mtime+size 키로)
 * 저장해 두고, 다음 질의에서는 *변경된 파일만* 다시 읽어 토크나이즈합니다. 나머지는 디스크/메모리
 * 캐시에서 그대로 가져옵니다. 디스크 쓰기는 디바운스되고 실패해도 in-memory 로만 동작합니다.
 * ============================================================
 */
 import * as fs from 'fs';
 import * as path from 'path';
 import { tokenize, countConflictIndicators } from './scoring';
 import { logInfo } from '../utils';
 const INDEX_VERSION = 2;
 const INDEX_DIR = '.astra';
 const INDEX_FILE = 'brain-index.json';
 /** 인덱스가 이 개수를 넘으면 이번 스캔에서 못 본 항목을 정리합니다 (삭제된 파일 누적 방지). */
 const MAX_INDEX_ENTRIES = 12000;
 /** 디스크 쓰기 디바운스. */
 const WRITE_DEBOUNCE_MS = 1500;
 interface IndexEntry {
    mtimeMs: number;
    size: number;
    title: string;          // basename without .md
    relativePath: string;   // relative to brainPath
    tokens: string[];       // tokenize(`${title} ${content}`)
    titleTokens: string[];  // tokenize(title)
    conflictCount: number;  // countConflictIndicators(`${title} ${content}`)
 }
 interface PersistedIndex {
    version: number;
    entries: Record<string, IndexEntry>; // keyed by absolute file path
 }
 export interface IndexedBrainDoc {
    filePath: string;
    relativePath: string;
    title: string;
    tokens: string[];
    titleTokens: string[];
    conflictCount: number;
    mtimeMs: number;
 }
 interface BrainState {
    index: PersistedIndex;
    dirty: boolean;
    diskPath: string | null;   // null if we can't determine a writable path
    writeTimer?: ReturnType<typeof setTimeout>;
 }
 const _states = new Map<string, BrainState>();
 function indexFileFor(brainPath: string): string {
    return path.join(brainPath, INDEX_DIR, INDEX_FILE);
 }
 function loadState(brainPath: string): BrainState {
    const existing = _states.get(brainPath);
    if (existing) return existing;
    let index: PersistedIndex = { version: INDEX_VERSION, entries: {} };
    let diskPath: string | null = null;
    try {
        diskPath = indexFileFor(brainPath);
        if (fs.existsSync(diskPath)) {
            const raw = JSON.parse(fs.readFileSync(diskPath, 'utf8'));
            if (raw && raw.version === INDEX_VERSION && raw.entries && typeof raw.entries === 'object') {
                index = raw as PersistedIndex;
            } else {
                logInfo('Brain index is stale/unrecognized — rebuilding.', { brainPath });
            }
        }
    } catch (e: any) {
        logInfo('Brain index load failed — starting fresh.', { brainPath, error: e?.message || String(e) });
        index = { version: INDEX_VERSION, entries: {} };
    }
    const st: BrainState = { index, dirty: false, diskPath };
    _states.set(brainPath, st);
    return st;
 }
 function scheduleWrite(st: BrainState, brainPath: string): void {
    if (!st.dirty || !st.diskPath || st.writeTimer) return;
    const timer = setTimeout(() => {
        st.writeTimer = undefined;
        if (!st.dirty || !st.diskPath) return;
        try {
            const dir = path.dirname(st.diskPath);
            fs.mkdirSync(dir, { recursive: true });
            // One-time .gitignore so the cache dir never gets committed into a Second Brain git repo.
            const gi = path.join(dir, '.gitignore');
            if (!fs.existsSync(gi)) {
                try { fs.writeFileSync(gi, '*\n', 'utf8'); } catch { /* non-fatal */ }
            }
            const tmp = `${st.diskPath}.tmp`;
            fs.writeFileSync(tmp, JSON.stringify(st.index), 'utf8');
            fs.renameSync(tmp, st.diskPath);
            st.dirty = false;
        } catch (e: any) {
            logInfo('Brain index write failed (continuing in-memory only).', { brainPath, error: e?.message || String(e) });
        }
    }, WRITE_DEBOUNCE_MS);
    if (typeof (timer as any).unref === 'function') (timer as any).unref();
    st.writeTimer = timer;
 }
 /**
 * Returns tokenized representations for `files` (absolute brain-file paths, already
 * scoped/filtered by the caller). Unchanged files are served from the index; changed/new
 * files are read & tokenized and the index is updated (debounced disk write).
 *
 * Safe to call with an empty/invalid `brainPath` or empty list — returns [].
 */
 export function getBrainTokenIndex(brainPath: string, files: string[]): IndexedBrainDoc[] {
    if (!brainPath || !Array.isArray(files) || files.length === 0) return [];
    const st = loadState(brainPath);
    const out: IndexedBrainDoc[] = [];
    const seen = new Set<string>();
    let reindexed = 0;
    for (const file of files) {
        seen.add(file);
        let stat: fs.Stats;
        try {
            stat = fs.statSync(file);
        } catch {
            continue; // listed but gone now — skip silently
        }
        const cached = st.index.entries[file];
        if (cached
            && cached.mtimeMs === stat.mtimeMs
            && cached.size === stat.size
            && Array.isArray(cached.tokens)
            && Array.isArray(cached.titleTokens)) {
            out.push({
                filePath: file,
                relativePath: cached.relativePath,
                title: cached.title,
                tokens: cached.tokens,
                titleTokens: cached.titleTokens,
                conflictCount: cached.conflictCount || 0,
                mtimeMs: cached.mtimeMs,
            });
            continue;
        }
        // (Re)index this file.
        let content = '';
        try {
            content = fs.readFileSync(file, 'utf8');
        } catch {
            continue;
        }
        const relativePath = path.relative(brainPath, file);
        const title = path.basename(file, '.md');
        const combined = `${title} ${content}`;
        const entry: IndexEntry = {
            mtimeMs: stat.mtimeMs,
            size: stat.size,
            title,
            relativePath,
            tokens: tokenize(combined),
            titleTokens: tokenize(title),
            conflictCount: countConflictIndicators(combined),
        };
        st.index.entries[file] = entry;
        st.dirty = true;
        reindexed++;
        out.push({
            filePath: file,
            relativePath,
            title,
            tokens: entry.tokens,
            titleTokens: entry.titleTokens,
            conflictCount: entry.conflictCount,
            mtimeMs: entry.mtimeMs,
        });
    }
    // Prune stale entries. We only prune when this looked like a (near-)full scan — i.e. we saw
    // most of the index — so an agent-scoped query doesn't evict cache for out-of-scope files.
    // (Falls back to a hard prune if the index has grown beyond MAX_INDEX_ENTRIES.)
    const entryKeys = Object.keys(st.index.entries);
    const looksFullScan = seen.size >= entryKeys.length * 0.8;
    if (looksFullScan || entryKeys.length > MAX_INDEX_ENTRIES) {
        for (const key of entryKeys) {
            if (!seen.has(key)) {
                delete st.index.entries[key];
                st.dirty = true;
            }
        }
    }
    if (reindexed > 0) {
        logInfo('Brain index updated.', { brainPath, files: files.length, reindexed, totalEntries: Object.keys(st.index.entries).length });
    }
    if (st.dirty) scheduleWrite(st, brainPath);
    return out;
 }
 /** Drop the in-memory index (and pending write) for one brain, or all brains. The disk file is left as-is. */
 export function clearBrainTokenIndex(brainPath?: string): void {
    if (brainPath === undefined) {
        for (const st of _states.values()) {
            if (st.writeTimer) { clearTimeout(st.writeTimer); st.writeTimer = undefined; }
        }
        _states.clear();
        return;
    }
    const st = _states.get(brainPath);
    if (st?.writeTimer) clearTimeout(st.writeTimer);
    _states.delete(brainPath);
 }
@@ -19,11 +19,13 @@ import { findBrainFiles, summarizeText } from '../utils';
 import { isInside } from '../lib/paths';
 import { MemoryManager } from '../memory';
 import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types';
-import { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from './scoring';
+import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
 import { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
 import { getBrainTokenIndex } from './brainIndex';
-export { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from './scoring';
+export { tokenize, expandQuery, scoreTfIdf, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
 export { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
 export { getBrainTokenIndex, clearBrainTokenIndex } from './brainIndex';
 export * from './types';
 interface RetrievalOptions {
@@ -133,34 +135,29 @@ export class RetrievalOrchestrator {
            if (allFiles.length === 0) return [];
-            // Read all files for TF-IDF
+            // Tokenized docs from the persistent mtime-keyed index — unchanged files are not re-read
-            const documents = allFiles.map((file) => {
+            // or re-tokenized, so per-query work over a large brain drops from O(total content) to O(files) stats.
            const indexed = getBrainTokenIndex(brain.localBrainPath, allFiles);
            if (indexed.length === 0) return [];
            const scored = scoreTfIdfPreTokenized(
                expandedTokens,
                indexed.map((d) => ({
                    tokens: d.tokens,
                    titleTokens: d.titleTokens,
                    lastModified: d.mtimeMs,
                    conflictCount: d.conflictCount,
                }))
            );
            const topResults: RetrievalChunk[] = [];
            for (const s of scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score).slice(0, limit)) {
                const doc = indexed[s.index];
                // Only the top `limit` files are actually read off disk (for excerpt extraction).
                let content = '';
-                let lastModified = 0;
+                try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — skip */ continue; }
-                try {
+                const excerpt = extractBestExcerpt(content, expandedTokens, 400);
-                    content = fs.readFileSync(file, 'utf8');
+                topResults.push({
                    lastModified = fs.statSync(file).mtimeMs;
                } catch { /* skip */ }
                return {
                    title: path.basename(file, '.md'),
                    content,
                    lastModified,
                    filePath: file,
                    relativePath: path.relative(brain.localBrainPath, file)
                };
            });
            // TF-IDF scoring
            const scored = scoreTfIdf(expandedTokens, documents);
            return scored
                .filter((s) => s.score > 0)
                .sort((a, b) => b.score - a.score)
                .slice(0, limit)
                .map((s) => {
                    const doc = documents[s.index];
                    const excerpt = extractBestExcerpt(doc.content, expandedTokens, 400);
                    return {
                    id: `brain-${s.index}`,
                    source: 'brain-memory' as const,
                    title: doc.relativePath,
@@ -170,15 +167,16 @@ export class RetrievalOrchestrator {
                    metadata: {
                        filePath: doc.filePath,
                        category: this.inferCategory(doc.relativePath),
-                            isProjectEvidence: this.isProjectEvidence(doc.relativePath, doc.content),
+                        isProjectEvidence: this.isProjectEvidence(doc.relativePath, content),
-                            lastUpdated: doc.lastModified,
+                        lastUpdated: doc.mtimeMs,
                        // Phase 5: Scoring Intelligence Integration
                        conflictDetected: s.conflictDetected,
                        conflictSeverity: s.conflictSeverity,
-                            informationDensity: s.informationDensity
+                        informationDensity: s.informationDensity,
-                        }
+                    },
                    };
                });
            }
            return topResults;
        } catch {
            return [];
        }
@@ -160,6 +160,30 @@ function inverseDocumentFrequency(
 export type ConflictSeverity = 'NONE' | 'LOW' | 'MEDIUM' | 'HIGH';
 /**
 * Counts how many distinct conflict-indicator words are present (substring match) in `rawText`.
 * Exposed so the brain index can cache this per-file instead of re-scanning content every query.
 */
 export function countConflictIndicators(rawText: string): number {
    const lower = (rawText || '').toLowerCase();
    let n = 0;
    for (const indicator of SCORING_CONFIG.CONFLICT_INDICATORS) {
        if (lower.includes(indicator.toLowerCase())) n++;
    }
    return n;
 }
 /** A document whose tokens were already computed (e.g. from the persistent brain index). */
 export interface PreTokenizedDoc {
    /** tokenize(`${title} ${content}`) */
    tokens: string[];
    /** tokenize(title) */
    titleTokens: string[];
    lastModified?: number;
    /** result of countConflictIndicators(`${title} ${content}`); 0 if unknown */
    conflictCount: number;
 }
 export interface ScoredDocument {
    index: number;
    score: number;
@@ -173,6 +197,8 @@ export interface ScoredDocument {
 /**
 * TF-IDF 기반으로 문서 집합을 스코어링합니다.
 * 문서 내용을 받아 즉석에서 토크나이즈합니다 — 이미 토큰화된 집합이 있다면
 * `scoreTfIdfPreTokenized` 를 직접 호출하면 토크나이즈를 건너뛸 수 있습니다.
 */
 export function scoreTfIdf(
    queryTokens: string[],
@@ -183,11 +209,28 @@ export function scoreTfIdf(
    }>
 ): ScoredDocument[] {
    if (documents.length === 0 || queryTokens.length === 0) return [];
    return scoreTfIdfPreTokenized(queryTokens, documents.map((doc) => {
        const combined = `${doc.title} ${doc.content}`;
        return {
            tokens: tokenize(combined),
            titleTokens: tokenize(doc.title),
            lastModified: doc.lastModified,
            conflictCount: countConflictIndicators(combined),
        };
    }));
 }
-    // Pre-tokenize all documents
+/**
-    const docTokenArrays = documents.map((doc) =>
+ * TF-IDF 스코어링 — 이미 토큰화된 문서 집합 버전 (브레인 인덱스 등 캐시된 토큰을 그대로 사용).
-        tokenize(`${doc.title} ${doc.content}`)
+ * `scoreTfIdf` 와 동일한 알고리즘이며 출력 형태도 같습니다.
-    );
+ */
 export function scoreTfIdfPreTokenized(
    queryTokens: string[],
    documents: PreTokenizedDoc[]
 ): ScoredDocument[] {
    if (documents.length === 0 || queryTokens.length === 0) return [];
    const docTokenArrays = documents.map((doc) => doc.tokens);
    const docTokenSets = docTokenArrays.map((tokens) => new Set(tokens));
    // Expand query with synonyms
@@ -205,22 +248,18 @@ export function scoreTfIdf(
    return documents.map((doc, index) => {
        const docTokens = docTokenArrays[index];
-        const titleTokens = new Set(tokenize(doc.title));
+        const titleTokens = new Set(doc.titleTokens);
        let score = 0;
        const matchedTerms: string[] = [];
-        // Conflict Detection & Severity Analysis (Substring based for better recall with particles)
+        // Conflict Detection & Severity Analysis (pre-counted by caller / index)
-        const rawText = `${doc.title} ${doc.content}`.toLowerCase();
+        const conflictCount = doc.conflictCount || 0;
-        const conflictMatches = [...SCORING_CONFIG.CONFLICT_INDICATORS].filter(indicator => 
+        const conflictDetected = conflictCount > 0;
            rawText.includes(indicator.toLowerCase())
        );
        const conflictDetected = conflictMatches.length > 0;
        let conflictSeverity: ConflictSeverity = 'NONE';
-        if (conflictMatches.length >= SCORING_CONFIG.CONFLICT_THRESHOLDS.HIGH) conflictSeverity = 'HIGH';
+        if (conflictCount >= SCORING_CONFIG.CONFLICT_THRESHOLDS.HIGH) conflictSeverity = 'HIGH';
-        else if (conflictMatches.length >= SCORING_CONFIG.CONFLICT_THRESHOLDS.MEDIUM) conflictSeverity = 'MEDIUM';
+        else if (conflictCount >= SCORING_CONFIG.CONFLICT_THRESHOLDS.MEDIUM) conflictSeverity = 'MEDIUM';
-        else if (conflictMatches.length >= SCORING_CONFIG.CONFLICT_THRESHOLDS.LOW) conflictSeverity = 'LOW';
+        else if (conflictCount >= SCORING_CONFIG.CONFLICT_THRESHOLDS.LOW) conflictSeverity = 'LOW';
        for (const term of expandedQuery) {
            const tf = termFrequency(term, docTokens);
@@ -34,6 +34,7 @@ export async function handleAgentMessage(provider: SidebarChatProvider, data: an
        case 'saveAgentSelection':
            await provider._context.globalState.update(SidebarChatProvider.lastAgentStateKey, data.path || 'none');
            logInfo(`Agent selection saved: ${data.path}`);
            void provider._sendReadyStatus();
            return true;
        case 'getKnowledgeScope': {
            const view = (provider as any)._view as vscode.WebviewView | undefined;
@@ -54,6 +55,7 @@ export async function handleAgentMessage(provider: SidebarChatProvider, data: an
                    brainRoot,
                },
            });
            void provider._sendReadyStatus();
            return true;
        }
        case 'editKnowledgeMap':
@@ -32,6 +32,10 @@ export async function handleChatMessage(provider: SidebarChatProvider, data: any
            await provider._sendModels();
            await provider._sendChronicleProjects();
            await provider._restoreActiveSessionIntoView();
            await provider._sendReadyStatus();
            return true;
        case 'getReadyStatus':
            await provider._sendReadyStatus();
            return true;
        case 'getModels':
            await provider._sendModels();
@@ -1,6 +1,7 @@
 import * as vscode from 'vscode';
 import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
 import { 
    _getBrainDir, 
    findBrainFiles, 
@@ -22,7 +23,8 @@ import { handleChatMessage } from './sidebar/chatHandlers';
 import { handleBrainMessage } from './sidebar/brainHandlers';
 import { handleChronicleMessage } from './sidebar/chronicleHandlers';
 import { handleAgentMessage } from './sidebar/agentHandlers';
-import { getOrCreateAgentEntry } from './skills/agentKnowledgeMap';
+import { getOrCreateAgentEntry, resolveScopeForAgent } from './skills/agentKnowledgeMap';
 import { estimateModelParamsB } from './lib/contextManager';
 import { loadExternalSkills, formatSkillsAsPromptBlock } from './skills/externalSkillLoader';
 export interface SidebarLmStudioDeps {
@@ -111,12 +113,14 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
            void this._sendModels();
            void this._sendBrainProfiles();
            void this._sendAgentsList();
            void this._sendReadyStatus();
        });
        webviewView.webview.html = this._getHtml(webviewView.webview);
        this._agent.setWebview(webviewView.webview);
        void this._restoreActiveSessionIntoView();
        void this._sendReadyStatus();
        webviewView.webview.onDidReceiveMessage(async (data) => {
            if (await handleChatMessage(this, data)) return;
@@ -393,6 +397,71 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
        });
    }
    /**
     * One-line "current readiness" snapshot for the sidebar's status bar:
     * engine online?, model loaded?, Brain file count, active Agent + mapped knowledge
     * folder count, memory on/off, context window. Cheap — no network calls except the
     * already-cached LM Studio loaded-models list and online flag.
     */
    async _sendReadyStatus() {
        if (!this._view) return;
        let payload: any;
        try {
            const config = getConfig();
            const engineKind = resolveEngine(config.ollamaUrl);
            const activeBrain = getActiveBrainProfile();
            let brainFiles = 0;
            try { brainFiles = findBrainFiles(activeBrain.localBrainPath).length; } catch { /* ignore */ }
            const agentPath = this._context.globalState.get<string>(SidebarChatProvider.lastAgentStateKey, 'none');
            let agentName: string | null = null;
            let scopeFolders = 0;
            let mapped = false;
            if (agentPath && agentPath !== 'none') {
                agentName = path.basename(agentPath).replace(/\.md$/i, '');
                try {
                    const scope = resolveScopeForAgent(agentPath, activeBrain.localBrainPath || '');
                    scopeFolders = scope.folders.length;
                    if (scope.agent?.name) agentName = scope.agent.name;
                    mapped = scope.source !== 'none';
                } catch { /* ignore */ }
            }
            let modelLoaded: boolean | null = null;
            if (engineKind === 'lmstudio') {
                try {
                    const loaded = (await this._lmStudio?.loadedModels()) || [];
                    modelLoaded = loaded.includes(config.defaultModel);
                } catch { modelLoaded = null; }
            }
            const paramB = estimateModelParamsB(config.defaultModel);
            const cappedForSmallModel = config.smallModelContextCap > 0
                && paramB !== null && paramB <= 4
                && config.contextLength > config.smallModelContextCap;
            const effectiveContextLength = cappedForSmallModel ? config.smallModelContextCap : config.contextLength;
            payload = {
                engine: {
                    kind: engineKind,
                    label: engineKind === 'lmstudio' ? 'LM Studio' : 'Ollama',
                    online: this._modelsCache?.online ?? null,
                },
                model: { name: config.defaultModel, loaded: modelLoaded, paramB },
                brain: { name: activeBrain.name, files: brainFiles },
                agent: { name: agentName, scopeFolders, mapped },
                memory: config.memoryEnabled,
                multiAgent: config.multiAgentEnabled,
                contextLength: effectiveContextLength,
                nominalContextLength: config.contextLength,
                cappedForSmallModel,
            };
        } catch (err: any) {
            logError('Failed to build ready status.', { error: err?.message || String(err) });
            return;
        }
        this._view.webview.postMessage({ type: 'readyStatus', value: payload });
    }
    async _sendBrainProfiles() {
        if (!this._view) return;
        const activeBrain = getActiveBrainProfile();
@@ -411,6 +480,7 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
                profiles
            }
        });
        void this._sendReadyStatus();
    }
    _postBrainProfiles(profiles: any[], activeBrainId: string) {
@@ -1558,9 +1628,18 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
    }
    _getAgentsDir(): string {
-        const defaultPath = 'E:\\Wiki\\Agent\\.agent\\skills';
+        // 1) Explicit config override (works on any OS — useful on Windows or for skills outside the workspace).
-        if (fs.existsSync(defaultPath)) return defaultPath;
+        const configured = (vscode.workspace.getConfiguration('g1nation').get<string>('agentSkillsPath', '') || '').trim();
-        
+        const expanded = configured.startsWith('~/') || configured === '~'
            ? path.join(os.homedir(), configured.slice(1).replace(/^[\\/]/, ''))
            : configured;
        if (expanded && path.isAbsolute(expanded)) {
            if (!fs.existsSync(expanded)) {
                try { fs.mkdirSync(expanded, { recursive: true }); } catch { /* fall through to workspace */ }
            }
            if (fs.existsSync(expanded)) return expanded;
        }
        // 2) Default: <workspace>/.agent/skills
        const workspaceFolders = vscode.workspace.workspaceFolders;
        if (workspaceFolders) {
            const localPath = path.join(workspaceFolders[0].uri.fsPath, '.agent', 'skills');
@@ -1586,6 +1665,7 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
        }
        const lastPath = this._context.globalState.get<string>(SidebarChatProvider.lastAgentStateKey, 'none');
        this._view.webview.postMessage({ type: 'agentsList', value: agents, selected: lastPath });
        void this._sendReadyStatus();
    }
    async _handleProactiveSuggestion(context: string) {
@@ -1629,7 +1709,7 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
        const filePath = path.join(dir, `${safeName}.md`);
        if (!fs.existsSync(filePath)) {
-            fs.writeFileSync(filePath, `# Agent Persona: ${safeName}\\n\\nAdd your instructions here...\\n`, 'utf8');
+            fs.writeFileSync(filePath, `# Agent Persona: ${safeName}\n\nAdd your instructions here...\n`, 'utf8');
        }
        const doc = await vscode.workspace.openTextDocument(filePath);
@@ -1834,6 +1914,8 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
        } catch (error: any) {
            logError('Prompt handling failed in sidebar provider.', { error: error?.message || String(error), promptPreview: summarizeText(value || '', 200) });
            this._view.webview.postMessage({ type: 'error', value: error.message });
        } finally {
            void this._sendReadyStatus();
        }
    }
@@ -1934,6 +2016,7 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
        } finally {
            this._modelDiscoveryInFlight = false;
        }
        void this._sendReadyStatus();
    }
    static _htmlTemplateCache: string | undefined;
@@ -0,0 +1,104 @@
 import * as fs from 'fs';
 import * as os from 'os';
 import * as path from 'path';
 import { getBrainTokenIndex, clearBrainTokenIndex } from '../src/retrieval/brainIndex';
 function mkTmpBrain(): string {
    return fs.mkdtempSync(path.join(os.tmpdir(), 'astra-brain-'));
 }
 function writeMd(brain: string, rel: string, content: string): string {
    const p = path.join(brain, rel);
    fs.mkdirSync(path.dirname(p), { recursive: true });
    fs.writeFileSync(p, content, 'utf8');
    return p;
 }
 function bumpMtime(file: string): void {
    // Some CI / fast machines write within the same ms — force a distinct mtime.
    const t = new Date(Date.now() + 5000);
    fs.utimesSync(file, t, t);
 }
 describe('brainIndex.getBrainTokenIndex', () => {
    let brain: string;
    beforeEach(() => { brain = mkTmpBrain(); });
    afterEach(() => {
        clearBrainTokenIndex(brain);
        try { fs.rmSync(brain, { recursive: true, force: true }); } catch { /* ignore */ }
    });
    it('tokenizes files and returns one entry per file', () => {
        const a = writeMd(brain, 'architecture-overview.md', '# Architecture overview\nThis describes the system architecture and design.');
        const b = writeMd(brain, 'records/bug-report.md', '# Bug report\n이 설계는 기존 구조와 충돌 위험이 있습니다.');
        const out = getBrainTokenIndex(brain, [a, b]);
        expect(out).toHaveLength(2);
        const byPath = new Map(out.map(d => [d.filePath, d]));
        expect(byPath.get(a)!.tokens).toContain('architecture');
        expect(byPath.get(a)!.tokens).toContain('design');
        expect(byPath.get(a)!.titleTokens.length).toBeGreaterThan(0);
        expect(byPath.get(b)!.relativePath).toBe(path.join('records', 'bug-report.md'));
        expect(byPath.get(b)!.conflictCount).toBeGreaterThan(0); // "충돌" is a conflict indicator
        expect(byPath.get(a)!.conflictCount).toBe(0);
    });
    it('reuses cached tokens for unchanged files and re-indexes only changed ones', () => {
        const a = writeMd(brain, 'alpha.md', 'alpha keyword stays the same here');
        const b = writeMd(brain, 'beta.md', 'beta original wording goes here');
        const first = getBrainTokenIndex(brain, [a, b]);
        const aTokensRef = first.find(d => d.filePath === a)!.tokens;
        // Re-call without changes — `a` should hand back the *same array reference* (served from cache).
        const second = getBrainTokenIndex(brain, [a, b]);
        expect(second.find(d => d.filePath === a)!.tokens).toBe(aTokensRef);
        // Change b.
        fs.writeFileSync(b, 'gamma replaced everything delta', 'utf8');
        bumpMtime(b);
        const third = getBrainTokenIndex(brain, [a, b]);
        const bTokens = third.find(d => d.filePath === b)!.tokens;
        expect(bTokens).toEqual(expect.arrayContaining(['gamma', 'delta']));
        expect(bTokens).not.toContain('original');
        // a still cached & unchanged.
        expect(third.find(d => d.filePath === a)!.tokens).toBe(aTokensRef);
    });
    it('skips files that vanished between listing and reading', () => {
        const a = writeMd(brain, 'present.md', 'present content');
        const ghost = path.join(brain, 'ghost.md');
        const out = getBrainTokenIndex(brain, [a, ghost]);
        expect(out.map(d => d.filePath)).toEqual([a]);
    });
    it('re-indexes a file that was deleted and recreated (mtime/size mismatch)', () => {
        const a = writeMd(brain, 'one.md', 'one content');
        const two = writeMd(brain, 'two.md', 'two original content');
        getBrainTokenIndex(brain, [a, two]);
        fs.rmSync(two);
        getBrainTokenIndex(brain, [a]);
        const recreated = writeMd(brain, 'two.md', 'completely different replacement content');
        bumpMtime(recreated);
        const out = getBrainTokenIndex(brain, [a, recreated]);
        expect(out.find(d => d.filePath === recreated)!.tokens).toContain('completely');
        expect(out.find(d => d.filePath === recreated)!.tokens).not.toContain('original');
    });
    it('handles empty/invalid input gracefully', () => {
        expect(getBrainTokenIndex('', ['x'])).toEqual([]);
        expect(getBrainTokenIndex(brain, [])).toEqual([]);
    });
    it('persists the index to <brain>/.astra/brain-index.json (debounced) and adds a .gitignore', (done) => {
        const a = writeMd(brain, 'persisted.md', 'persist me to disk');
        getBrainTokenIndex(brain, [a]);
        setTimeout(() => {
            try {
                const astraDir = path.join(brain, '.astra');
                expect(fs.existsSync(path.join(astraDir, 'brain-index.json'))).toBe(true);
                expect(fs.readFileSync(path.join(astraDir, '.gitignore'), 'utf8')).toContain('*');
                const persisted = JSON.parse(fs.readFileSync(path.join(astraDir, 'brain-index.json'), 'utf8'));
                expect(persisted.version).toBeGreaterThanOrEqual(1);
                expect(persisted.entries[a].tokens).toContain('persist');
                done();
            } catch (e) { done(e as Error); }
        }, 2200);
    }, 6000);
 });
@@ -0,0 +1,113 @@
 import {
    estimateTokens,
    estimateMessagesTokens,
    computeOutputBudget,
    trimHistoryToBudget,
    truncateSystemPromptContext,
    classifyStopReason,
    estimateModelParamsB,
    CONTEXT_OPEN_MARKER,
    CONTEXT_CLOSE_MARKER,
    type BudgetMessage,
 } from '../src/lib/contextManager';
 describe('contextManager.estimateModelParamsB', () => {
    it('reads common naming schemes', () => {
        expect(estimateModelParamsB('qwen2.5-7b-instruct')).toBe(7);
        expect(estimateModelParamsB('llama-3.1-8b')).toBe(8);
        expect(estimateModelParamsB('google/gemma-3n-e2b-it')).toBe(2);
        expect(estimateModelParamsB('gemma4:e2b')).toBe(2);
        expect(estimateModelParamsB('Qwen3-30B-A3B')).toBe(30);
    });
    it('returns null when there is no clear parameter hint', () => {
        expect(estimateModelParamsB('phi-3-mini')).toBeNull();
        expect(estimateModelParamsB('gpt-4o')).toBeNull();
        expect(estimateModelParamsB('')).toBeNull();
        expect(estimateModelParamsB('llama-q4bit')).toBeNull(); // quantization, not params
        expect(estimateModelParamsB('mixtral-8x7b')).toBeNull(); // MoE size is ambiguous — don't guess
    });
 });
 describe('contextManager.computeOutputBudget', () => {
    const limits = { contextLength: 32768, maxOutputTokens: 4096, safetyMargin: 2048, minOutputTokens: 512 };
    it('caps at maxOutputTokens when there is plenty of room', () => {
        const r = computeOutputBudget(1000, limits);
        expect(r.maxOutputTokens).toBe(4096);
        expect(r.tight).toBe(false);
    });
    it('shrinks output as input grows', () => {
        const r = computeOutputBudget(30000, limits); // 32768 - 30000 - 2048 = 720
        expect(r.maxOutputTokens).toBe(720);
        expect(r.tight).toBe(false);
    });
    it('flags tight and floors at minOutputTokens when input nearly fills the window', () => {
        const r = computeOutputBudget(31000, limits); // available 32768-31000-2048 = -280 ≤ 512
        expect(r.maxOutputTokens).toBe(512);
        expect(r.tight).toBe(true);
    });
 });
 describe('contextManager.trimHistoryToBudget', () => {
    const marker = (n: number): BudgetMessage => ({ role: 'system', content: `[dropped ${n}]`, internal: true });
    it('keeps everything when under budget', () => {
        const msgs: BudgetMessage[] = [{ role: 'user', content: 'hi' }, { role: 'assistant', content: 'hello' }];
        const r = trimHistoryToBudget(msgs, 10_000, marker);
        expect(r.droppedCount).toBe(0);
        expect(r.messages).toEqual(msgs);
    });
    it('drops oldest messages and prepends a marker when over budget', () => {
        const msgs: BudgetMessage[] = Array.from({ length: 10 }, (_, i) => ({ role: i % 2 ? 'assistant' : 'user', content: 'x'.repeat(400) }));
        const r = trimHistoryToBudget(msgs, 250, marker); // each msg ≈ 400*0.3+4 = 124 tokens
        expect(r.droppedCount).toBeGreaterThan(0);
        expect(r.messages[0].content).toMatch(/^\[dropped \d+\]$/);
        // most recent message survives
        expect(r.messages[r.messages.length - 1]).toEqual(msgs[msgs.length - 1]);
        expect(r.tokensAfter).toBeLessThanOrEqual(250 + estimateMessagesTokens([marker(1)]));
    });
    it('always keeps at least the last message even if it alone exceeds the budget', () => {
        const msgs: BudgetMessage[] = [{ role: 'user', content: 'short' }, { role: 'user', content: 'y'.repeat(5000) }];
        const r = trimHistoryToBudget(msgs, 10, marker);
        expect(r.messages.some(m => m.content === 'y'.repeat(5000))).toBe(true);
    });
 });
 describe('contextManager.truncateSystemPromptContext', () => {
    it('leaves a small prompt untouched', () => {
        const p = 'You are helpful.';
        expect(truncateSystemPromptContext(p, 1000)).toEqual({ prompt: p, truncated: false });
    });
    it('trims only the [CONTEXT]…[/CONTEXT] body, preserving head and tail', () => {
        const head = 'CORE INSTRUCTIONS that must never be dropped. ' + 'a'.repeat(200);
        const body = 'BIG BRAIN CONTEXT ' + 'b'.repeat(20_000);
        const tail = 'CRITICAL NEGATIVE CONSTRAINTS — also never dropped. ' + 'c'.repeat(200);
        const prompt = head + CONTEXT_OPEN_MARKER + body + CONTEXT_CLOSE_MARKER + tail;
        const out = truncateSystemPromptContext(prompt, 400);
        expect(out.truncated).toBe(true);
        expect(out.prompt).toContain('CORE INSTRUCTIONS');
        expect(out.prompt).toContain('CRITICAL NEGATIVE CONSTRAINTS');
        expect(out.prompt).toContain(CONTEXT_CLOSE_MARKER.trim());
        // The bulk of the body is gone
        expect(out.prompt.length).toBeLessThan(prompt.length / 2);
        expect(estimateTokens(out.prompt)).toBeLessThanOrEqual(400 + estimateTokens(tail) + 64);
    });
    it('falls back to a hard tail-cut when there is no [CONTEXT] marker', () => {
        const prompt = 'instructions ' + 'z'.repeat(50_000);
        const out = truncateSystemPromptContext(prompt, 200);
        expect(out.truncated).toBe(true);
        expect(out.prompt.length).toBeLessThan(prompt.length);
        expect(out.prompt.startsWith('instructions')).toBe(true);
    });
 });
 describe('contextManager.classifyStopReason', () => {
    it('maps engine-specific reasons to common kinds', () => {
        expect(classifyStopReason('eosFound')).toBe('complete');
        expect(classifyStopReason('stop')).toBe('complete');
        expect(classifyStopReason('length')).toBe('output-limit');
        expect(classifyStopReason('maxPredictedTokensReached')).toBe('output-limit');
        expect(classifyStopReason('contextLengthReached')).toBe('context-overflow');
        expect(classifyStopReason('userStopped')).toBe('user-stopped');
        expect(classifyStopReason('failed')).toBe('error');
        expect(classifyStopReason(undefined)).toBe('unknown');
    });
 });
@@ -6,6 +6,7 @@
 */
 import { LMStudioStreamer } from '../src/lmstudio/streamer';
 import type { ChatStreamEvent } from '../src/lmstudio/streamer';
 import type { ILMStudioClient } from '../src/lmstudio/client';
 class FakeModel {
@@ -15,14 +16,16 @@ class FakeModel {
    public failNext: Error | null = null;
    public chunks: string[] = [];
-    constructor(opts: { chunks?: string[]; failAfter?: number; throwOnRespond?: Error } = {}) {
+    constructor(opts: { chunks?: string[]; failAfter?: number; throwOnRespond?: Error; stopReason?: string } = {}) {
        this.chunks = opts.chunks ?? ['Hel', 'lo, ', 'world'];
        this._failAfter = opts.failAfter;
        this._throwOnRespond = opts.throwOnRespond;
        this.stopReason = opts.stopReason;
    }
    private _failAfter?: number;
    private _throwOnRespond?: Error;
    public stopReason: string | undefined;
    respond(chat: any, opts: any) {
        if (this._throwOnRespond) {
@@ -32,10 +35,15 @@ class FakeModel {
        this.lastOpts = opts;
        const chunks = this.chunks;
        const failAfter = this._failAfter;
        const stopReason = this.stopReason;
        let i = 0;
        const self = this;
-        return {
+        // Real OngoingPrediction is both async-iterable AND a thenable resolving to a
        // PredictionResult with `.stats.stopReason`. Mirror that shape so the streamer
        // can read the stop reason after the stream drains.
        const prediction: any = {
            cancel: async () => { self.cancelCount++; },
            then(resolve: (v: any) => void) { resolve({ stats: { stopReason } }); },
            [Symbol.asyncIterator]() {
                return {
                    async next() {
@@ -54,6 +62,7 @@ class FakeModel {
                };
            },
        };
        return prediction;
    }
 }
@@ -78,9 +87,19 @@ class FakeClient implements ILMStudioClient {
    }
 }
-async function collect(stream: AsyncIterable<{ token: string }>): Promise<string[]> {
+// The streamer emits a trailing { token: '', stopReason } event on normal completion;
 // `collect` returns just the non-empty content tokens (what every real consumer uses).
 async function collect(stream: AsyncIterable<ChatStreamEvent>): Promise<string[]> {
    const out: string[] = [];
-    for await (const { token } of stream) out.push(token);
+    for await (const { token } of stream) {
        if (token) out.push(token);
    }
    return out;
 }
 async function collectEvents(stream: AsyncIterable<ChatStreamEvent>): Promise<ChatStreamEvent[]> {
    const out: ChatStreamEvent[] = [];
    for await (const ev of stream) out.push(ev);
    return out;
 }
@@ -98,6 +117,22 @@ describe('LMStudioStreamer', () => {
        expect(client.model.lastOpts.temperature).toBe(0.4);
    });
    test('emits a trailing stopReason event from prediction stats', async () => {
        const client = new FakeClient(new FakeModel({ chunks: ['hi'], stopReason: 'maxPredictedTokensReached' }));
        const streamer = new LMStudioStreamer(client);
        const events = await collectEvents(streamer.stream({
            modelName: 'm1',
            messages: [{ role: 'user', content: 'hi' }],
            temperature: 0.1,
            maxTokens: 64,
        }));
        expect(events.map(e => e.token)).toEqual(['hi', '']);
        expect(events[events.length - 1].stopReason).toBe('maxPredictedTokensReached');
        // maxTokens / contextOverflowPolicy are forwarded to the SDK
        expect(client.model.lastOpts.maxTokens).toBe(64);
        expect(client.model.lastOpts.contextOverflowPolicy).toBe('stopAtLimit');
    });
    test('passes signal through to the SDK', async () => {
        const client = new FakeClient();
        const streamer = new LMStudioStreamer(client);