chore: version up to 2.80.38 and package with refined recovery

This commit is contained in:
g1nation
2026-05-13 00:15:45 +09:00
parent 6c4bc3494f
commit eb36cec050
15 changed files with 202 additions and 62 deletions
+32 -18
View File
@@ -45,6 +45,7 @@ import {
extractVisibleFinal,
shouldFinalOnlyRetry,
shouldAutoContinue,
looksCutOff,
mergeContinuationParts,
buildContinuationUserPrompt,
FINAL_ONLY_DIRECTIVE,
@@ -485,27 +486,36 @@ export class AgentExecutor {
let fullSystemPrompt: string;
if (isAgentMode) {
// 1. 기본 시스템 프롬프트에서 에이전트 포맷과 충돌하는 섹션 제거
// The Agent's prompt IS the primary directive (role / persona / tone / output format),
// so it LEADS the system prompt — models anchor on the first persona they see, not the
// last, especially small ones. The Astra base prompt is reduced to neutral scaffolding
// (action tags, current date, anti-leak rules) and follows; a short reminder at the very
// end keeps the model from drifting back to a generic assistant.
const strippedSystemPrompt = this.stripAstraFormattingForAgentMode(systemPrompt);
const agentPromptText = (options.agentSkillContext || '').trim();
if (estimateTokens(agentPromptText) > Math.floor(config.contextLength * 0.5)) {
logInfo('Agent prompt is unusually large relative to the context window.', {
model: actualModel, agentPromptTokens: estimateTokens(agentPromptText), contextLength: config.contextLength,
});
}
// 2. Astra 전용 컨텍스트는 에이전트 모드에서 비활성화
// (astraStanceCtx, thinkingPartnerCtx, v4PolicyCtx → 에이전트 역할과 충돌)
const agentDirective = [
'\n\n[AGENT MODE — ABSOLUTE OVERRIDE]',
'You are NOT operating as Astra for this response.',
'A specialized Agent has been selected by the user.',
'ALL output format, role, persona, and style instructions from the Agent below',
'take ABSOLUTE PRECEDENCE over any previous formatting rules (including ## 요약, ## 상세 설명, ## 제안).',
'You MUST follow the Agent\'s 📄 Output Format exactly. Do NOT fall back to Astra\'s default format.',
const agentBlock = [
'[AGENT MODE — PRIMARY DIRECTIVE]',
'A specialized Agent has been selected by the user. The Agent System Prompt below is your',
'PRIMARY directive: it defines your role, persona, tone, and output format. Follow it exactly.',
'Everything after the Agent block (action-tag reference, date, brain/project context) is technical',
'scaffolding — use it only as the Agent\'s task requires. Do NOT impose a generic assistant',
'format (e.g. ## 요약 / ## 상세 설명 / ## 제안) unless the Agent explicitly asks for one.',
'',
'--- AGENT SYSTEM PROMPT START ---',
options.agentSkillContext,
'--- AGENT SYSTEM PROMPT END ---'
agentPromptText || '(this agent has no instructions yet — fall back to being a concise, direct assistant)',
'--- AGENT SYSTEM PROMPT END ---',
].join('\n');
const agentTailReminder = '\n\n[REMINDER] You are operating as the Agent defined above. Keep its role, persona, and output format. Do not fall back to a default assistant style or section format.';
// 3. 조립: 기본(축소) → 유틸리티 컨텍스트 → 에이전트 프롬프트(최후단)
// [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — agentDirective/negative 는 보호.
fullSystemPrompt = `${strippedSystemPrompt}${internetCtx}${memoryCtx}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}${agentDirective}`;
// [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — agentBlock(앞)·reminder(뒤)·negative 는 보호.
// memoryCtx(RAG/메모리/lessons)도 [CONTEXT] 안에 넣어 토큰이 빡빡할 때 대화 기록보다 먼저 잘리게 한다.
fullSystemPrompt = `${agentBlock}\n\n${strippedSystemPrompt}${internetCtx}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}${agentTailReminder}`;
} else {
// 기존 Astra 모드 (에이전트 미선택)
const localProjectKnowledgeCtx = prompt && localPathContext && this.isProjectKnowledgeCreationRequest(prompt)
@@ -530,7 +540,8 @@ export class AgentExecutor {
const casualCtx = isCasualConversation
? '\n\n[CASUAL CONVERSATION MODE]\nThe user sent a greeting, acknowledgement, or light conversational message. Reply naturally and briefly to the message itself. Do not use Second Brain, memory, project records, reports, references, or analysis unless the user explicitly asks for them.'
: '';
fullSystemPrompt = `${systemPrompt}${internetCtx}${memoryCtx}${designerCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}${casualCtx}\n\n[CONTEXT]\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
// memoryCtx(RAG/메모리/lessons)는 [CONTEXT] 안에 — 토큰이 빡빡하면 대화 기록보다 먼저 잘림.
fullSystemPrompt = `${systemPrompt}${internetCtx}${designerCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}${casualCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
}
// ──────────────────────────────────────────────────────────────────
// [Context Limit Manager] context length 는 "답변을 그만큼 길게 써도 된다"
@@ -980,8 +991,11 @@ export class AgentExecutor {
});
}
const outputTokens = estimateTokens(assistantContent);
const notice = shouldShowTruncationNotice(stopKind, outputTokens, maxOutputTokens)
? truncationNotice(stopKind)
// Show the "incomplete" notice when the engine said output-limit/context-overflow/error,
// OR when (after all auto-continuation rounds) the answer still plainly ends mid-sentence.
const notice =
shouldShowTruncationNotice(stopKind, outputTokens, maxOutputTokens) ? truncationNotice(stopKind)
: looksCutOff(assistantContent) ? truncationNotice('output-limit')
: '';
if (notice && assistantContent.trim()) {
assistantContent = assistantContent.trimEnd() + notice;
+38 -6
View File
@@ -125,8 +125,36 @@ export function shouldFinalOnlyRetry(cleaned: CleanedAssistantOutput): boolean {
}
/**
* Should we silently continue from where the answer was cut off? Only when it actually hit the
* output-token ceiling and we already have a non-trivial visible answer to continue from.
* Does the answer plainly end mid-sentence / mid-structure? Conservative — only flags *unambiguous*
* incompleteness (a complete Korean sentence may legitimately end without a period, so we never flag
* a plain syllable like `다`/`요`; we only flag connective particles, mid-English-words, mid-clause
* commas/colons, unclosed code fences/brackets, and dangling markdown bullets/headings).
*/
export function looksCutOff(text: string): boolean {
const t = (text || '').replace(/\s+$/, '');
if (t.length < 12) return false;
// unclosed code fence
if ((t.match(/```/g) || []).length % 2 === 1) return true;
// ends with an opening bracket / quote (unclosed pair)
if (/[([{“‘"'`]$/.test(t)) return true;
// dangling markdown bullet / heading / blockquote with no content after the marker
if (/(?:^|\n)\s*(?:[-*+]|#{1,6}|>|\d+\.)\s*$/.test(t)) return true;
// ends mid-English-word or mid-number
if (/[A-Za-z0-9]$/.test(t)) return true;
// ends mid-clause (comma / colon / semicolon / list separator)
if (/[,:;·、,]$/.test(t)) return true;
// ends with a Korean particle / connective ending that NEVER closes a sentence
if (/(?:으로|로서|로써|로|의|에서|에게|한테|에|을|를|과|와|이랑|랑|는|은|이|가|도|만|까지|부터|마다|조차|마저|밖에|뿐|처럼|같이|보다|이나|거나|든지|든가|고|며|면서|면|어서|아서|여서|니까|는데|은데|ㄴ데|지만|던|도록)$/.test(t)) return true;
return false;
}
/**
* Should we silently continue from where the answer was cut off? The point is to recover regardless
* of *why* it stopped, since local engines / SDKs often report the stop reason wrongly or not at all:
* - the engine said it hit the output cap (`output-limit`), OR
* - it generated close to the cap (a complete answer wouldn't dangle that early), OR
* - the visible answer plainly ends mid-sentence and the engine didn't give a clean "done" reason.
* Never continues from a too-short fragment, and never from a clean ending (terminal punctuation).
*/
export function shouldAutoContinue(
stopKind: GenerationStopKind,
@@ -134,10 +162,14 @@ export function shouldAutoContinue(
outputTokens: number,
maxOutputTokens: number
): boolean {
if (stopKind !== 'output-limit') return false;
if (!visibleAnswer || visibleAnswer.trim().length < 40) return false;
if (!Number.isFinite(maxOutputTokens) || maxOutputTokens <= 0) return true;
return outputTokens >= Math.floor(maxOutputTokens * 0.8);
const v = (visibleAnswer || '').trim();
if (v.length < 24) return false;
// These won't be fixed by generating more text — don't auto-continue.
if (stopKind === 'user-stopped' || stopKind === 'context-overflow' || stopKind === 'error' || stopKind === 'tool-calls') return false;
if (stopKind === 'output-limit') return true;
if (Number.isFinite(maxOutputTokens) && maxOutputTokens > 0 && outputTokens >= Math.floor(maxOutputTokens * 0.85)) return true;
// 'complete' (eosFound) or 'unknown' but the text is plainly unfinished → continue.
return looksCutOff(v);
}
/** Appended to the system prompt for a final-only retry — the previous reply was reasoning-only. */
+3 -1
View File
@@ -128,7 +128,9 @@ export class LMStudioStreamer implements IChatStreamer {
logInfo('LM Studio SDK chat stream finished.', { model: trimmedModel, stopReason, tokensYielded: yielded });
}
} catch { /* result unavailable on some SDK versions — non-fatal */ }
yield { token: '', stopReason: stopReason ?? 'eosFound' };
// Don't claim `eosFound` if we couldn't actually read the stop reason — leave it
// undefined so the caller treats it as 'unknown' (and its mid-sentence heuristics kick in).
yield { token: '', stopReason };
return;
}
+24 -15
View File
@@ -1789,22 +1789,31 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
this._currentSessionBrainId = selectedBrainId;
let agentSkillContext = undefined;
if (agentFile && fs.existsSync(agentFile)) {
agentSkillContext = fs.readFileSync(agentFile, 'utf8');
// Merge in any external skill .md files the user has mapped to this
// agent. We concatenate into the same agentSkillContext blob so the
// rest of the pipeline (agent.ts, agent-mode override) treats them
// identically to the agent's own .md — no further changes needed.
try {
const entry = getOrCreateAgentEntry(agentFile);
const bundle = loadExternalSkills(entry.skillFolders);
const block = formatSkillsAsPromptBlock(bundle);
if (block) {
agentSkillContext = `${agentSkillContext.trim()}\n\n${block}`;
if (agentFile && agentFile !== 'none' && fs.existsSync(agentFile)) {
const fileContent = fs.readFileSync(agentFile, 'utf8');
// Guard: a freshly-created agent still has only the placeholder template
// ("# Agent Persona: …\n\nAdd your instructions here…"). Treating that as a real
// agent prompt just confuses the model — fall back to normal mode and tell the user.
const body = fileContent.replace(/^?#\s*Agent\s*Persona\s*:.*$/im, '').trim();
const isPlaceholder = !body || /^add your instructions here/i.test(body);
if (isPlaceholder) {
logInfo('Selected agent has no real instructions — running without agent mode.', { agentFile });
this._view?.webview.postMessage({ type: 'lmStudioError', value: '선택한 에이전트에 내용이 없습니다 — 에이전트 프롬프트를 작성한 뒤 다시 시도하세요. (이번 응답은 에이전트 없이 처리합니다)' });
} else {
agentSkillContext = fileContent;
// Merge in any external skill .md files the user has mapped to this agent. We concatenate
// into the same agentSkillContext blob so the rest of the pipeline (agent.ts, agent-mode
// override) treats them identically to the agent's own .md — no further changes needed.
try {
const entry = getOrCreateAgentEntry(agentFile);
const bundle = loadExternalSkills(entry.skillFolders);
const block = formatSkillsAsPromptBlock(bundle);
if (block) {
agentSkillContext = `${agentSkillContext.trim()}\n\n${block}`;
}
} catch (e: any) {
logError('External skill load failed.', { error: e?.message || String(e) });
}
} catch (e: any) {
logError('External skill load failed.', { error: e?.message || String(e) });
}
}