chore: v2.2.73 — ASTRA-DEBUG 로그 레벨 + webview CSP font-src 보강

- ASTRA-DEBUG 정상 흐름 로그를 console.error → logInfo/console.log 로 강등 (chatHandlers, extension, slashRouter): DevTools에 ERR로 찍히던 오탐 제거 - sidebar webview에 명시적 CSP meta 추가 + font-src에 data: 허용 (sidebar.html, sidebarProvider._getHtml): VS Code outer iframe이 codicon.ttf를 data:font/ttf 로 inject하면서 기본 CSP에 막혀 매 prompt 마다 violation 경고가 찍히던 문제 해소 - 누적된 LM Studio / agent / 컨텍스트 매니저 / 테스트 갱신 동반 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-23 15:52:19 +09:00
parent 36db170844
commit 0712014fcb
43 changed files with 2417 additions and 977 deletions
@@ -143,6 +143,65 @@ export interface IAgentConfig {
     * 누적됨. false면 critique은 그 미션 한정으로만 사용되고 사라짐.
     */
    autoLessonFromReflection: boolean;
+    // ─── 5-stage workflow (Drafter + Synthesizer) ───
+    /** Drafter(=Writer) 출력 뒤에 SynthesizerAgent로 최종 다듬기 패스를 한 번 더 돌릴지. 기본 true. */
+    workflowSynthesizerEnabled: boolean;
+    /**
+     * Multi-Agent 발동 모드:
+     *  - 'auto' (기본): 작은 모델(≤4B) 감지 OR prompt가 컨텍스트의 큰 비중을 차지할 때만 자동 발동.
+     *  - 'always': 인사·짧은 잡담을 제외한 모든 요청에 5단계 파이프라인 사용.
+     *  - 'off': 기존 single-agent 동작 (수동 토글 / 키워드 매칭만 사용).
+     */
+    workflowMultiAgentMode: 'auto' | 'always' | 'off';
+    /**
+     * 'auto' 모드에서 prompt + brain context 토큰이 contextLength 의 이 비율(0~1)을 넘으면 강제 5단계.
+     * 기본 0.30 — 작은 모델이 30% 이상을 input으로 먹기 시작하면 한 번에 끝내려는 시도가 위험.
+     */
+    workflowAutoCtxFractionThreshold: number;
+    // ─── Stream 표시 ───
+    /**
+     * 모델 토큰을 받는 즉시 채팅 버블에 흘려보낼지 여부.
+     *  - false(기본): 토큰은 내부에서만 누적, sanitize 끝난 최종 답변만 한 번에 표시 → Harmony/think 마커 누설 원천 차단.
+     *  - true: legacy 라이브 스트리밍. 모델 출력에 control token 이 섞여 나오면 잠깐 화면에 보일 수 있음.
+     */
+    liveStreamTokens: boolean;
+    /**
+     * 최종 답변 포맷.
+     *  - 'plain' (기본): 모델이 무심코 내보낸 `##`, `**`, `__`, `> `, `* ` 등의 마크다운 마커를 후처리로 모두 제거.
+     *    섹션 라벨 텍스트(예: "핵심 요약")는 유지되지만 헤더 마커는 사라져 깔끔한 plain text 로 표시.
+     *  - 'markdown': legacy 동작. 모델 출력을 그대로 렌더러에 넘김.
+     */
+    outputFormat: 'plain' | 'markdown';
+    /**
+     * 자동 기록 (project chronicle auto-record). true 면 매 prompt 후 의미 있는 turn 을
+     * Wiki/Chronicle 폴더에 자동으로 저장. false 면 자동 저장 OFF (수동 기록은 계속 가능).
+     * 사이드바 도구 드롭다운의 토글 항목으로 즉시 변경 가능.
+     */
+    chronicleAutoRecord: boolean;
+    // ─── LM Studio sampling (applied to both SDK and REST paths) ───
+    /** LM Studio nucleus sampling cutoff (0~1). Lower tightens; 1 disables. */
+    lmStudioTopP: number;
+    /** LM Studio top-K cutoff (0 disables). */
+    lmStudioTopK: number;
+    /** LM Studio min-P floor (0~1, 0 disables). */
+    lmStudioMinP: number;
+    /** LM Studio repeat penalty (1 disables, 1.05–1.2 typical). */
+    lmStudioRepeatPenalty: number;
+    /** Render tok/s + TTFT from prediction stats into context-budget badge. */
+    lmStudioShowStatsInBudget: boolean;
+    /** LM Studio model key of a small draft model for speculative decoding ('' = disabled). */
+    lmStudioDraftModel: string;
+    /** Load-time options. Read once per load(); changing these after load needs a reload. */
+    lmStudioLoad: {
+        flashAttention: boolean;
+        /** "max" | "off" | number 0-1 */
+        gpuOffloadRatio: 'max' | 'off' | number;
+        offloadKVCacheToGpu: boolean;
+        keepModelInMemory: boolean;
+        useFp16ForKVCache: boolean;
+        /** 0 = engine default */
+        evalBatchSize: number;
+    };
 }

 // ─── 경로 정규화 유틸리티 ───
@@ -245,6 +304,40 @@ export function getConfig(): IAgentConfig {
        companyPixelOfficeBubbles: cfg.get<boolean>('company.pixelOffice.bubbles', true),
        enableReflection: cfg.get<boolean>('enableReflection', true),
        autoLessonFromReflection: cfg.get<boolean>('autoLessonFromReflection', true),
+        workflowSynthesizerEnabled: cfg.get<boolean>('workflow.synthesizerEnabled', true),
+        workflowMultiAgentMode: ((): 'auto' | 'always' | 'off' => {
+            const v = (cfg.get<string>('workflow.multiAgentMode', 'auto') || 'auto').trim().toLowerCase();
+            return v === 'always' || v === 'off' ? v : 'auto';
+        })(),
+        workflowAutoCtxFractionThreshold: Math.max(0.05, Math.min(0.95,
+            cfg.get<number>('workflow.autoCtxFractionThreshold', 0.30)
+        )),
+        liveStreamTokens: cfg.get<boolean>('liveStreamTokens', true),
+        outputFormat: ((): 'plain' | 'markdown' => {
+            const v = (cfg.get<string>('outputFormat', 'plain') || 'plain').trim().toLowerCase();
+            return v === 'markdown' ? 'markdown' : 'plain';
+        })(),
+        chronicleAutoRecord: cfg.get<boolean>('chronicleAutoRecord', true),
+        lmStudioTopP: Math.max(0, Math.min(1, cfg.get<number>('lmStudio.sampling.topP', 0.9))),
+        lmStudioTopK: Math.max(0, cfg.get<number>('lmStudio.sampling.topK', 20)),
+        lmStudioMinP: Math.max(0, Math.min(1, cfg.get<number>('lmStudio.sampling.minP', 0.05))),
+        lmStudioRepeatPenalty: Math.max(1, Math.min(2, cfg.get<number>('lmStudio.sampling.repeatPenalty', 1.1))),
+        lmStudioShowStatsInBudget: cfg.get<boolean>('lmStudio.statsInBudget', true),
+        lmStudioDraftModel: (cfg.get<string>('lmStudio.draftModel', '') || '').trim(),
+        lmStudioLoad: {
+            flashAttention: cfg.get<boolean>('lmStudio.load.flashAttention', true),
+            gpuOffloadRatio: ((): 'max' | 'off' | number => {
+                const raw = (cfg.get<string>('lmStudio.load.gpuOffloadRatio', 'max') || 'max').trim().toLowerCase();
+                if (raw === 'max' || raw === 'off') return raw;
+                const n = Number(raw);
+                if (Number.isFinite(n)) return Math.max(0, Math.min(1, n));
+                return 'max';
+            })(),
+            offloadKVCacheToGpu: cfg.get<boolean>('lmStudio.load.offloadKVCacheToGpu', true),
+            keepModelInMemory: cfg.get<boolean>('lmStudio.load.keepModelInMemory', true),
+            useFp16ForKVCache: cfg.get<boolean>('lmStudio.load.useFp16ForKVCache', false),
+            evalBatchSize: Math.max(0, cfg.get<number>('lmStudio.load.evalBatchSize', 0)),
+        },
    };
 }