feat: v2.2.63 — 한국어 오타 최소화 (채팅 temperature 설정 + anti-glitch 샘플링)
- streamer.ts: LM Studio SDK 호출에 topP/topK/minP/repeatPenalty 추가 — 저확률 오답 토큰을 잘라 한글 음절 깨짐(붕괴→붕점) 억제 - 채팅 기본 temperature 0.7 → 0.3 (분석/업무형 답변 안정화) - 신규 설정 g1nation.chatTemperature — Settings 패널 '고급' 섹션에서 조절 가능 (config.ts / settingsPanelProvider / settings-panel.html+js) chronicle 기록(ADR-0022, ADR-0023) 포함. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -437,7 +437,7 @@ export class AgentExecutor {
|
||||
brainEnabled = false,
|
||||
loopDepth = 0,
|
||||
visionContent,
|
||||
temperature = 0.7,
|
||||
temperature = getConfig().chatTemperature,
|
||||
systemPrompt = getSystemPrompt()
|
||||
} = options;
|
||||
const { ollamaUrl, defaultModel: configDefaultModel, timeout, multiAgentEnabled } = getConfig();
|
||||
|
||||
@@ -24,6 +24,8 @@ export interface IAgentConfig {
|
||||
activeBrainId: string;
|
||||
maxContextSize: number;
|
||||
maxAutoSteps: number;
|
||||
/** 채팅 응답 생성 temperature. 낮을수록 한국어 오타·깨진 토큰이 줄어든다. */
|
||||
chatTemperature: number;
|
||||
dryRun: boolean;
|
||||
multiAgentEnabled: boolean;
|
||||
memoryEnabled: boolean;
|
||||
@@ -204,6 +206,7 @@ export function getConfig(): IAgentConfig {
|
||||
activeBrainId: activeBrain.id,
|
||||
maxContextSize: cfg.get<number>('maxContextSize', 12000),
|
||||
maxAutoSteps: cfg.get<number>('maxAutoSteps', 50),
|
||||
chatTemperature: Math.min(2, Math.max(0, cfg.get<number>('chatTemperature', 0.3))),
|
||||
dryRun: cfg.get<boolean>('dryRun', false),
|
||||
multiAgentEnabled: cfg.get<boolean>('multiAgentEnabled', false),
|
||||
memoryEnabled: cfg.get<boolean>('memoryEnabled', true),
|
||||
|
||||
@@ -82,6 +82,7 @@ interface SettingsState {
|
||||
multiAgentEnabled: boolean;
|
||||
maxAutoSteps: number;
|
||||
maxContextSize: number;
|
||||
chatTemperature: number;
|
||||
};
|
||||
datacollect: {
|
||||
bridgeUrl: string;
|
||||
@@ -581,6 +582,9 @@ export class SettingsPanelProvider implements vscode.WebviewViewProvider {
|
||||
if (typeof msg.maxContextSize === 'number' && Number.isFinite(msg.maxContextSize)) {
|
||||
await this._safeConfigUpdate('maxContextSize', Math.max(1000, Math.floor(msg.maxContextSize)));
|
||||
}
|
||||
if (typeof msg.chatTemperature === 'number' && Number.isFinite(msg.chatTemperature)) {
|
||||
await this._safeConfigUpdate('chatTemperature', Math.max(0, Math.min(2, msg.chatTemperature)));
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────── Datacollect (slash 명령) ──────────────
|
||||
@@ -652,6 +656,7 @@ export class SettingsPanelProvider implements vscode.WebviewViewProvider {
|
||||
multiAgentEnabled: cfg.get<boolean>('multiAgentEnabled', false),
|
||||
maxAutoSteps: cfg.get<number>('maxAutoSteps', 50) ?? 50,
|
||||
maxContextSize: cfg.get<number>('maxContextSize', 32000) ?? 32000,
|
||||
chatTemperature: cfg.get<number>('chatTemperature', 0.3) ?? 0.3,
|
||||
},
|
||||
datacollect: {
|
||||
bridgeUrl: cfg.get<string>('datacollectBridgeUrl', '') || '',
|
||||
|
||||
@@ -75,6 +75,15 @@ export class LMStudioStreamer implements IChatStreamer {
|
||||
const prediction = (model as any).respond(req.messages, {
|
||||
temperature: req.temperature,
|
||||
maxTokens: req.maxTokens ?? 4096,
|
||||
// Glitch suppression: a small / quantized model samples wrong
|
||||
// neighbour tokens (Korean syllable corruption like 붕괴→붕점,
|
||||
// 핵심→핵점) when the distribution is left wide. A tight nucleus
|
||||
// + top-k and a min-p floor cut the low-probability tail;
|
||||
// repeatPenalty curbs stutter (것입니다서입니다).
|
||||
topPSampling: 0.9,
|
||||
topKSampling: 20,
|
||||
minPSampling: 0.05,
|
||||
repeatPenalty: 1.1,
|
||||
// Safety net: if our own token budgeting still underestimated and the prompt
|
||||
// exceeds the model's context window, decide whether the SDK should fail
|
||||
// loudly (stopAtLimit — default) or silently drop content.
|
||||
|
||||
Reference in New Issue
Block a user