chore: version up to 2.80.37 and package with response recovery

2026-05-12 23:55:00 +09:00
parent e0af15767a
commit 6c4bc3494f
12 changed files with 466 additions and 21 deletions
@@ -41,6 +41,15 @@ import { MemoryManager } from './memory';
 import { RetrievalOrchestrator } from './retrieval';
 import { buildLessonChecklistBlock, isQaRegressionFeedback, findUnaddressedChecklistItems } from './retrieval/lessonHelpers';
 import { resolveScopeForAgent } from './skills/agentKnowledgeMap';
+import {
+    extractVisibleFinal,
+    shouldFinalOnlyRetry,
+    shouldAutoContinue,
+    mergeContinuationParts,
+    buildContinuationUserPrompt,
+    FINAL_ONLY_DIRECTIVE,
+    CONTINUATION_SYSTEM_PROMPT,
+} from './core/responseRecovery';
 import {
    estimateTokens,
    estimateMessagesTokens,
@@ -846,11 +855,95 @@ export class AgentExecutor {
                }
            }

+            // ── Thought Quarantine + Final-only Retry + Auto-Continuation ──
+            // The user is waiting for an answer, not for a chance to manage the generation engine:
+            //   (a) hidden reasoning (Harmony channels, <think>…, "Thinking Process:") never reaches
+            //       the screen — stripped here, and from what executeActions / chatHistory see;
+            //   (b) if the model emitted *only* reasoning → silently retry, final-answer-only;
+            //   (c) if the answer was cut off at the output ceiling → continue it internally with a
+            //       *compressed* request (original question + the answer so far), up to N rounds.
+            let cleaned = extractVisibleFinal(aiResponseText);
+            if (cleaned.hadHiddenReasoning) {
+                logInfo('Stripped hidden reasoning from the model output.', {
+                    model: actualModel, hiddenChars: cleaned.hiddenReasoning.length,
+                    visibleChars: cleaned.visible.length, hadFinalChannel: cleaned.hadFinalChannel,
+                    thoughtOnly: cleaned.wasThoughtOnly,
+                });
+            }
+
+            // (b) Final-only retry — the reply was reasoning-only, no visible answer.
+            if (shouldFinalOnlyRetry(cleaned)
+                && config.finalOnlyRetryOnThoughtLeak
+                && loopDepth === 0
+                && !this.abortController?.signal.aborted) {
+                try {
+                    this.webview.postMessage({ type: 'autoContinue', value: '답변을 정리하는 중입니다...' });
+                    const retryMsgs: ChatMessage[] = messagesForRequest.map((m, i) =>
+                        i === 0 ? { ...m, content: `${m.content}\n${FINAL_ONLY_DIRECTIVE}` } : m);
+                    const r = await this.callNonStreaming({
+                        baseUrl: ollamaUrl, modelName: actualModel, engine, messages: retryMsgs,
+                        temperature, maxTokens: maxOutputTokens, contextLength: ctxLimits.contextLength,
+                        signal: this.abortController?.signal,
+                    });
+                    if (r.stopReason) finishStopReason = r.stopReason;
+                    const rc = extractVisibleFinal(r.text);
+                    if (rc.visible.trim()) {
+                        logInfo('Final-only retry recovered a visible answer.', { model: actualModel, length: rc.visible.length });
+                        aiResponseText = r.text;
+                        cleaned = rc;
+                    }
+                } catch (e: any) {
+                    logError('Final-only retry failed.', { model: actualModel, error: e?.message ?? String(e) });
+                }
+            }
+
+            // (c) Auto-continuation — the visible answer hit the output-token ceiling.
+            let continuationCount = 0;
+            if (config.autoContinueOnOutputLimit && config.maxAutoContinuations > 0 && loopDepth === 0) {
+                const originalUserPrompt = prompt || (this.chatHistory.find(m => m.role === 'user' && typeof m.content === 'string')?.content as string) || '';
+                let lastOutputTokens = estimateTokens(cleaned.visible);
+                while (
+                    shouldAutoContinue(classifyStopReason(finishStopReason), cleaned.visible, lastOutputTokens, maxOutputTokens)
+                    && continuationCount < config.maxAutoContinuations
+                    && !this.abortController?.signal.aborted
+                    && !this.isStaleRun(runId)
+                ) {
+                    continuationCount++;
+                    this.webview.postMessage({ type: 'autoContinue', value: `답변이 길어 이어서 정리하는 중입니다... (${continuationCount}/${config.maxAutoContinuations})` });
+                    try {
+                        const contMsgs: ChatMessage[] = [
+                            { role: 'system', content: CONTINUATION_SYSTEM_PROMPT, internal: true },
+                            { role: 'user', content: buildContinuationUserPrompt(originalUserPrompt, cleaned.visible) },
+                        ];
+                        const contMax = computeOutputBudget(estimateMessagesTokens(contMsgs), ctxLimits).maxOutputTokens;
+                        const cr = await this.callNonStreaming({
+                            baseUrl: ollamaUrl, modelName: actualModel, engine, messages: contMsgs,
+                            temperature, maxTokens: contMax, contextLength: ctxLimits.contextLength,
+                            signal: this.abortController?.signal,
+                        });
+                        finishStopReason = cr.stopReason;
+                        const ccl = extractVisibleFinal(cr.text);
+                        if (!ccl.visible.trim()) {
+                            logInfo('Continuation produced no visible text — stopping.', { model: actualModel, round: continuationCount });
+                            break;
+                        }
+                        cleaned = { ...cleaned, visible: mergeContinuationParts(cleaned.visible, ccl.visible), wasThoughtOnly: false };
+                        lastOutputTokens = estimateTokens(ccl.visible);
+                        logInfo('Auto-continued the answer.', { model: actualModel, round: continuationCount, addedChars: ccl.visible.length, totalChars: cleaned.visible.length, contStopReason: cr.stopReason });
+                    } catch (e: any) {
+                        logError('Auto-continuation failed.', { model: actualModel, round: continuationCount, error: e?.message ?? String(e) });
+                        break;
+                    }
+                }
+                if (this.isStaleRun(runId)) return;
+            }
+            const cleanedVisible = cleaned.visible;
+
            // 5. Execute Actions
-            const rationale = this.parseRationale(aiResponseText);
+            const rationale = this.parseRationale(cleanedVisible);
            let assistantContent = this.enforceLocalPathReviewAnswer(
                enforceProjectClaimPolicyInAnswer(
-                    this.sanitizeAssistantContent(aiResponseText),
+                    this.sanitizeAssistantContent(cleanedVisible),
                    secondBrainTrace
                ),
                localPathContext
@@ -900,7 +993,8 @@ export class AgentExecutor {
            this.emitHistoryChanged();

            this.statusBarManager.updateStatus(AgentStatus.Executing);
-            const report = await this.executeActions(aiResponseText, rootPath, activeBrain);
+            // Action tags are honored only from the visible final answer — never from hidden reasoning.
+            const report = await this.executeActions(cleanedVisible, rootPath, activeBrain);
            if (!assistantContent.trim() && report.length === 0) {
                const promptCharCount = messagesForRequest.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
                logError('Model returned an empty response without actions.', {