chore: version up to 2.80.39 and package with UI/config polish
This commit is contained in:
+17
-8
@@ -551,14 +551,16 @@ export class AgentExecutor {
|
||||
// (2) 대화 기록을 남은 예산에 맞게 압축하고 (UI 표시용 chatHistory 는 건드리지 않음)
|
||||
// (3) 동적으로 출력 상한(maxOutputTokens)을 계산한다.
|
||||
// ──────────────────────────────────────────────────────────────────
|
||||
// Small models (≤4B) routinely fail on prompts that fit their *nominal* context but
|
||||
// exceed their *effective* capability (server log shows truncated=0 yet eval time≈0ms —
|
||||
// Genuinely tiny models (≤3B) sometimes fail on prompts that fit their *nominal* context
|
||||
// but exceed their *effective* capability (server log shows truncated=0 yet eval time≈0ms —
|
||||
// the model emitted EOS as the first token). When detected, budget against a smaller
|
||||
// effective window so the system prompt / RAG / history get shrunk proactively.
|
||||
// Note: 4B+ models (e.g. gemma-4-e4b with a 100k+ window) are competent — DON'T cap them,
|
||||
// or the output budget gets squeezed to the minimum and answers come out truncated.
|
||||
const modelParamB = estimateModelParamsB(actualModel);
|
||||
const smallModelCap = config.smallModelContextCap; // 0 disables this guard
|
||||
const cappedForSmallModel = smallModelCap > 0
|
||||
&& modelParamB !== null && modelParamB <= 4
|
||||
&& modelParamB !== null && modelParamB <= 3
|
||||
&& config.contextLength > smallModelCap;
|
||||
const effectiveContextLength = cappedForSmallModel ? smallModelCap : config.contextLength;
|
||||
if (cappedForSmallModel) {
|
||||
@@ -664,7 +666,7 @@ export class AgentExecutor {
|
||||
brainFiles: brainFiles.length,
|
||||
imageCount,
|
||||
tight: outputBudget.tight,
|
||||
smallModel: cappedForSmallModel || (modelParamB !== null && modelParamB <= 3 && inputTokens > 8000),
|
||||
smallModel: cappedForSmallModel || (modelParamB !== null && modelParamB <= 3 && inputTokens > 12000),
|
||||
},
|
||||
});
|
||||
// If the user's message reads like a regression complaint ("또 안 돼", "비슷한 실수", "왜 반복돼"…),
|
||||
@@ -913,8 +915,9 @@ export class AgentExecutor {
|
||||
if (config.autoContinueOnOutputLimit && config.maxAutoContinuations > 0 && loopDepth === 0) {
|
||||
const originalUserPrompt = prompt || (this.chatHistory.find(m => m.role === 'user' && typeof m.content === 'string')?.content as string) || '';
|
||||
let lastOutputTokens = estimateTokens(cleaned.visible);
|
||||
let lastMaxOutputTokens = maxOutputTokens; // budget the last round actually had (≠ first gen's after round 1)
|
||||
while (
|
||||
shouldAutoContinue(classifyStopReason(finishStopReason), cleaned.visible, lastOutputTokens, maxOutputTokens)
|
||||
shouldAutoContinue(classifyStopReason(finishStopReason), cleaned.visible, lastOutputTokens, lastMaxOutputTokens)
|
||||
&& continuationCount < config.maxAutoContinuations
|
||||
&& !this.abortController?.signal.aborted
|
||||
&& !this.isStaleRun(runId)
|
||||
@@ -926,10 +929,10 @@ export class AgentExecutor {
|
||||
{ role: 'system', content: CONTINUATION_SYSTEM_PROMPT, internal: true },
|
||||
{ role: 'user', content: buildContinuationUserPrompt(originalUserPrompt, cleaned.visible) },
|
||||
];
|
||||
const contMax = computeOutputBudget(estimateMessagesTokens(contMsgs), ctxLimits).maxOutputTokens;
|
||||
lastMaxOutputTokens = computeOutputBudget(estimateMessagesTokens(contMsgs), ctxLimits).maxOutputTokens;
|
||||
const cr = await this.callNonStreaming({
|
||||
baseUrl: ollamaUrl, modelName: actualModel, engine, messages: contMsgs,
|
||||
temperature, maxTokens: contMax, contextLength: ctxLimits.contextLength,
|
||||
temperature, maxTokens: lastMaxOutputTokens, contextLength: ctxLimits.contextLength,
|
||||
signal: this.abortController?.signal,
|
||||
});
|
||||
finishStopReason = cr.stopReason;
|
||||
@@ -938,9 +941,15 @@ export class AgentExecutor {
|
||||
logInfo('Continuation produced no visible text — stopping.', { model: actualModel, round: continuationCount });
|
||||
break;
|
||||
}
|
||||
const before = cleaned.visible;
|
||||
cleaned = { ...cleaned, visible: mergeContinuationParts(cleaned.visible, ccl.visible), wasThoughtOnly: false };
|
||||
lastOutputTokens = estimateTokens(ccl.visible);
|
||||
logInfo('Auto-continued the answer.', { model: actualModel, round: continuationCount, addedChars: ccl.visible.length, totalChars: cleaned.visible.length, contStopReason: cr.stopReason });
|
||||
logInfo('Auto-continued the answer.', { model: actualModel, round: continuationCount, addedChars: ccl.visible.length, totalChars: cleaned.visible.length, contStopReason: cr.stopReason, contMaxTokens: lastMaxOutputTokens });
|
||||
// Guard against a continuation that adds (almost) nothing new after dedup — stop instead of spinning.
|
||||
if (cleaned.visible.length - before.length < 20) {
|
||||
logInfo('Continuation added negligible new text — stopping.', { model: actualModel, round: continuationCount });
|
||||
break;
|
||||
}
|
||||
} catch (e: any) {
|
||||
logError('Auto-continuation failed.', { model: actualModel, round: continuationCount, error: e?.message ?? String(e) });
|
||||
break;
|
||||
|
||||
+2
-2
@@ -122,9 +122,9 @@ export function getConfig(): IAgentConfig {
|
||||
return v === 'truncateMiddle' || v === 'rollingWindow' ? v : 'stopAtLimit';
|
||||
})(),
|
||||
autoCompactHistory: cfg.get<boolean>('autoCompactHistory', true),
|
||||
smallModelContextCap: Math.max(0, cfg.get<number>('smallModelContextCap', 8192)),
|
||||
smallModelContextCap: Math.max(0, cfg.get<number>('smallModelContextCap', 16384)),
|
||||
autoContinueOnOutputLimit: cfg.get<boolean>('autoContinueOnOutputLimit', true),
|
||||
maxAutoContinuations: Math.max(0, Math.min(10, cfg.get<number>('maxAutoContinuations', 3))),
|
||||
maxAutoContinuations: Math.max(0, Math.min(10, cfg.get<number>('maxAutoContinuations', 4))),
|
||||
finalOnlyRetryOnThoughtLeak: cfg.get<boolean>('finalOnlyRetryOnThoughtLeak', true)
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user