chore: version up to 2.80.37 and package with response recovery
This commit is contained in:
+97
-3
@@ -41,6 +41,15 @@ import { MemoryManager } from './memory';
|
||||
import { RetrievalOrchestrator } from './retrieval';
|
||||
import { buildLessonChecklistBlock, isQaRegressionFeedback, findUnaddressedChecklistItems } from './retrieval/lessonHelpers';
|
||||
import { resolveScopeForAgent } from './skills/agentKnowledgeMap';
|
||||
import {
|
||||
extractVisibleFinal,
|
||||
shouldFinalOnlyRetry,
|
||||
shouldAutoContinue,
|
||||
mergeContinuationParts,
|
||||
buildContinuationUserPrompt,
|
||||
FINAL_ONLY_DIRECTIVE,
|
||||
CONTINUATION_SYSTEM_PROMPT,
|
||||
} from './core/responseRecovery';
|
||||
import {
|
||||
estimateTokens,
|
||||
estimateMessagesTokens,
|
||||
@@ -846,11 +855,95 @@ export class AgentExecutor {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Thought Quarantine + Final-only Retry + Auto-Continuation ──
|
||||
// The user is waiting for an answer, not for a chance to manage the generation engine:
|
||||
// (a) hidden reasoning (Harmony channels, <think>…, "Thinking Process:") never reaches
|
||||
// the screen — stripped here, and from what executeActions / chatHistory see;
|
||||
// (b) if the model emitted *only* reasoning → silently retry, final-answer-only;
|
||||
// (c) if the answer was cut off at the output ceiling → continue it internally with a
|
||||
// *compressed* request (original question + the answer so far), up to N rounds.
|
||||
let cleaned = extractVisibleFinal(aiResponseText);
|
||||
if (cleaned.hadHiddenReasoning) {
|
||||
logInfo('Stripped hidden reasoning from the model output.', {
|
||||
model: actualModel, hiddenChars: cleaned.hiddenReasoning.length,
|
||||
visibleChars: cleaned.visible.length, hadFinalChannel: cleaned.hadFinalChannel,
|
||||
thoughtOnly: cleaned.wasThoughtOnly,
|
||||
});
|
||||
}
|
||||
|
||||
// (b) Final-only retry — the reply was reasoning-only, no visible answer.
|
||||
if (shouldFinalOnlyRetry(cleaned)
|
||||
&& config.finalOnlyRetryOnThoughtLeak
|
||||
&& loopDepth === 0
|
||||
&& !this.abortController?.signal.aborted) {
|
||||
try {
|
||||
this.webview.postMessage({ type: 'autoContinue', value: '답변을 정리하는 중입니다...' });
|
||||
const retryMsgs: ChatMessage[] = messagesForRequest.map((m, i) =>
|
||||
i === 0 ? { ...m, content: `${m.content}\n${FINAL_ONLY_DIRECTIVE}` } : m);
|
||||
const r = await this.callNonStreaming({
|
||||
baseUrl: ollamaUrl, modelName: actualModel, engine, messages: retryMsgs,
|
||||
temperature, maxTokens: maxOutputTokens, contextLength: ctxLimits.contextLength,
|
||||
signal: this.abortController?.signal,
|
||||
});
|
||||
if (r.stopReason) finishStopReason = r.stopReason;
|
||||
const rc = extractVisibleFinal(r.text);
|
||||
if (rc.visible.trim()) {
|
||||
logInfo('Final-only retry recovered a visible answer.', { model: actualModel, length: rc.visible.length });
|
||||
aiResponseText = r.text;
|
||||
cleaned = rc;
|
||||
}
|
||||
} catch (e: any) {
|
||||
logError('Final-only retry failed.', { model: actualModel, error: e?.message ?? String(e) });
|
||||
}
|
||||
}
|
||||
|
||||
// (c) Auto-continuation — the visible answer hit the output-token ceiling.
|
||||
let continuationCount = 0;
|
||||
if (config.autoContinueOnOutputLimit && config.maxAutoContinuations > 0 && loopDepth === 0) {
|
||||
const originalUserPrompt = prompt || (this.chatHistory.find(m => m.role === 'user' && typeof m.content === 'string')?.content as string) || '';
|
||||
let lastOutputTokens = estimateTokens(cleaned.visible);
|
||||
while (
|
||||
shouldAutoContinue(classifyStopReason(finishStopReason), cleaned.visible, lastOutputTokens, maxOutputTokens)
|
||||
&& continuationCount < config.maxAutoContinuations
|
||||
&& !this.abortController?.signal.aborted
|
||||
&& !this.isStaleRun(runId)
|
||||
) {
|
||||
continuationCount++;
|
||||
this.webview.postMessage({ type: 'autoContinue', value: `답변이 길어 이어서 정리하는 중입니다... (${continuationCount}/${config.maxAutoContinuations})` });
|
||||
try {
|
||||
const contMsgs: ChatMessage[] = [
|
||||
{ role: 'system', content: CONTINUATION_SYSTEM_PROMPT, internal: true },
|
||||
{ role: 'user', content: buildContinuationUserPrompt(originalUserPrompt, cleaned.visible) },
|
||||
];
|
||||
const contMax = computeOutputBudget(estimateMessagesTokens(contMsgs), ctxLimits).maxOutputTokens;
|
||||
const cr = await this.callNonStreaming({
|
||||
baseUrl: ollamaUrl, modelName: actualModel, engine, messages: contMsgs,
|
||||
temperature, maxTokens: contMax, contextLength: ctxLimits.contextLength,
|
||||
signal: this.abortController?.signal,
|
||||
});
|
||||
finishStopReason = cr.stopReason;
|
||||
const ccl = extractVisibleFinal(cr.text);
|
||||
if (!ccl.visible.trim()) {
|
||||
logInfo('Continuation produced no visible text — stopping.', { model: actualModel, round: continuationCount });
|
||||
break;
|
||||
}
|
||||
cleaned = { ...cleaned, visible: mergeContinuationParts(cleaned.visible, ccl.visible), wasThoughtOnly: false };
|
||||
lastOutputTokens = estimateTokens(ccl.visible);
|
||||
logInfo('Auto-continued the answer.', { model: actualModel, round: continuationCount, addedChars: ccl.visible.length, totalChars: cleaned.visible.length, contStopReason: cr.stopReason });
|
||||
} catch (e: any) {
|
||||
logError('Auto-continuation failed.', { model: actualModel, round: continuationCount, error: e?.message ?? String(e) });
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (this.isStaleRun(runId)) return;
|
||||
}
|
||||
const cleanedVisible = cleaned.visible;
|
||||
|
||||
// 5. Execute Actions
|
||||
const rationale = this.parseRationale(aiResponseText);
|
||||
const rationale = this.parseRationale(cleanedVisible);
|
||||
let assistantContent = this.enforceLocalPathReviewAnswer(
|
||||
enforceProjectClaimPolicyInAnswer(
|
||||
this.sanitizeAssistantContent(aiResponseText),
|
||||
this.sanitizeAssistantContent(cleanedVisible),
|
||||
secondBrainTrace
|
||||
),
|
||||
localPathContext
|
||||
@@ -900,7 +993,8 @@ export class AgentExecutor {
|
||||
this.emitHistoryChanged();
|
||||
|
||||
this.statusBarManager.updateStatus(AgentStatus.Executing);
|
||||
const report = await this.executeActions(aiResponseText, rootPath, activeBrain);
|
||||
// Action tags are honored only from the visible final answer — never from hidden reasoning.
|
||||
const report = await this.executeActions(cleanedVisible, rootPath, activeBrain);
|
||||
if (!assistantContent.trim() && report.length === 0) {
|
||||
const promptCharCount = messagesForRequest.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
|
||||
logError('Model returned an empty response without actions.', {
|
||||
|
||||
Reference in New Issue
Block a user