chore: version up to 2.80.40 and package with ADR-0008
This commit is contained in:
+6
-7
@@ -551,14 +551,13 @@ export class AgentExecutor {
|
||||
// (2) 대화 기록을 남은 예산에 맞게 압축하고 (UI 표시용 chatHistory 는 건드리지 않음)
|
||||
// (3) 동적으로 출력 상한(maxOutputTokens)을 계산한다.
|
||||
// ──────────────────────────────────────────────────────────────────
|
||||
// Genuinely tiny models (≤3B) sometimes fail on prompts that fit their *nominal* context
|
||||
// but exceed their *effective* capability (server log shows truncated=0 yet eval time≈0ms —
|
||||
// the model emitted EOS as the first token). When detected, budget against a smaller
|
||||
// effective window so the system prompt / RAG / history get shrunk proactively.
|
||||
// Note: 4B+ models (e.g. gemma-4-e4b with a 100k+ window) are competent — DON'T cap them,
|
||||
// or the output budget gets squeezed to the minimum and answers come out truncated.
|
||||
// Optional opt-in guard (g1nation.smallModelContextCap, OFF/0 by default): some very small
|
||||
// models (≤3B) emit EOS as the first token when the prompt is near their context window
|
||||
// even though it nominally fits. If the user opted in, budget ≤3B models against that
|
||||
// smaller effective window. Never applied to 4B+ models, and never when the setting is 0 —
|
||||
// capping squeezes the output-token budget, so it's a knob, not a default.
|
||||
const modelParamB = estimateModelParamsB(actualModel);
|
||||
const smallModelCap = config.smallModelContextCap; // 0 disables this guard
|
||||
const smallModelCap = config.smallModelContextCap; // 0 = disabled (default)
|
||||
const cappedForSmallModel = smallModelCap > 0
|
||||
&& modelParamB !== null && modelParamB <= 3
|
||||
&& config.contextLength > smallModelCap;
|
||||
|
||||
+1
-1
@@ -122,7 +122,7 @@ export function getConfig(): IAgentConfig {
|
||||
return v === 'truncateMiddle' || v === 'rollingWindow' ? v : 'stopAtLimit';
|
||||
})(),
|
||||
autoCompactHistory: cfg.get<boolean>('autoCompactHistory', true),
|
||||
smallModelContextCap: Math.max(0, cfg.get<number>('smallModelContextCap', 16384)),
|
||||
smallModelContextCap: Math.max(0, cfg.get<number>('smallModelContextCap', 0)),
|
||||
autoContinueOnOutputLimit: cfg.get<boolean>('autoContinueOnOutputLimit', true),
|
||||
maxAutoContinuations: Math.max(0, Math.min(10, cfg.get<number>('maxAutoContinuations', 4))),
|
||||
finalOnlyRetryOnThoughtLeak: cfg.get<boolean>('finalOnlyRetryOnThoughtLeak', true)
|
||||
|
||||
Reference in New Issue
Block a user