chore: version up to 2.80.38 and package with refined recovery
This commit is contained in:
+32
-18
@@ -45,6 +45,7 @@ import {
|
||||
extractVisibleFinal,
|
||||
shouldFinalOnlyRetry,
|
||||
shouldAutoContinue,
|
||||
looksCutOff,
|
||||
mergeContinuationParts,
|
||||
buildContinuationUserPrompt,
|
||||
FINAL_ONLY_DIRECTIVE,
|
||||
@@ -485,27 +486,36 @@ export class AgentExecutor {
|
||||
let fullSystemPrompt: string;
|
||||
|
||||
if (isAgentMode) {
|
||||
// 1. 기본 시스템 프롬프트에서 에이전트 포맷과 충돌하는 섹션 제거
|
||||
// The Agent's prompt IS the primary directive (role / persona / tone / output format),
|
||||
// so it LEADS the system prompt — models anchor on the first persona they see, not the
|
||||
// last, especially small ones. The Astra base prompt is reduced to neutral scaffolding
|
||||
// (action tags, current date, anti-leak rules) and follows; a short reminder at the very
|
||||
// end keeps the model from drifting back to a generic assistant.
|
||||
const strippedSystemPrompt = this.stripAstraFormattingForAgentMode(systemPrompt);
|
||||
const agentPromptText = (options.agentSkillContext || '').trim();
|
||||
if (estimateTokens(agentPromptText) > Math.floor(config.contextLength * 0.5)) {
|
||||
logInfo('Agent prompt is unusually large relative to the context window.', {
|
||||
model: actualModel, agentPromptTokens: estimateTokens(agentPromptText), contextLength: config.contextLength,
|
||||
});
|
||||
}
|
||||
|
||||
// 2. Astra 전용 컨텍스트는 에이전트 모드에서 비활성화
|
||||
// (astraStanceCtx, thinkingPartnerCtx, v4PolicyCtx → 에이전트 역할과 충돌)
|
||||
const agentDirective = [
|
||||
'\n\n[AGENT MODE — ABSOLUTE OVERRIDE]',
|
||||
'You are NOT operating as Astra for this response.',
|
||||
'A specialized Agent has been selected by the user.',
|
||||
'ALL output format, role, persona, and style instructions from the Agent below',
|
||||
'take ABSOLUTE PRECEDENCE over any previous formatting rules (including ## 요약, ## 상세 설명, ## 제안).',
|
||||
'You MUST follow the Agent\'s 📄 Output Format exactly. Do NOT fall back to Astra\'s default format.',
|
||||
const agentBlock = [
|
||||
'[AGENT MODE — PRIMARY DIRECTIVE]',
|
||||
'A specialized Agent has been selected by the user. The Agent System Prompt below is your',
|
||||
'PRIMARY directive: it defines your role, persona, tone, and output format. Follow it exactly.',
|
||||
'Everything after the Agent block (action-tag reference, date, brain/project context) is technical',
|
||||
'scaffolding — use it only as the Agent\'s task requires. Do NOT impose a generic assistant',
|
||||
'format (e.g. ## 요약 / ## 상세 설명 / ## 제안) unless the Agent explicitly asks for one.',
|
||||
'',
|
||||
'--- AGENT SYSTEM PROMPT START ---',
|
||||
options.agentSkillContext,
|
||||
'--- AGENT SYSTEM PROMPT END ---'
|
||||
agentPromptText || '(this agent has no instructions yet — fall back to being a concise, direct assistant)',
|
||||
'--- AGENT SYSTEM PROMPT END ---',
|
||||
].join('\n');
|
||||
const agentTailReminder = '\n\n[REMINDER] You are operating as the Agent defined above. Keep its role, persona, and output format. Do not fall back to a default assistant style or section format.';
|
||||
|
||||
// 3. 조립: 기본(축소) → 유틸리티 컨텍스트 → 에이전트 프롬프트(최후단)
|
||||
// [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — agentDirective/negative 는 보호.
|
||||
fullSystemPrompt = `${strippedSystemPrompt}${internetCtx}${memoryCtx}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}${agentDirective}`;
|
||||
// [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — agentBlock(앞)·reminder(뒤)·negative 는 보호.
|
||||
// memoryCtx(RAG/메모리/lessons)도 [CONTEXT] 안에 넣어 토큰이 빡빡할 때 대화 기록보다 먼저 잘리게 한다.
|
||||
fullSystemPrompt = `${agentBlock}\n\n${strippedSystemPrompt}${internetCtx}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}${agentTailReminder}`;
|
||||
} else {
|
||||
// 기존 Astra 모드 (에이전트 미선택)
|
||||
const localProjectKnowledgeCtx = prompt && localPathContext && this.isProjectKnowledgeCreationRequest(prompt)
|
||||
@@ -530,7 +540,8 @@ export class AgentExecutor {
|
||||
const casualCtx = isCasualConversation
|
||||
? '\n\n[CASUAL CONVERSATION MODE]\nThe user sent a greeting, acknowledgement, or light conversational message. Reply naturally and briefly to the message itself. Do not use Second Brain, memory, project records, reports, references, or analysis unless the user explicitly asks for them.'
|
||||
: '';
|
||||
fullSystemPrompt = `${systemPrompt}${internetCtx}${memoryCtx}${designerCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}${casualCtx}\n\n[CONTEXT]\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
|
||||
// memoryCtx(RAG/메모리/lessons)는 [CONTEXT] 안에 — 토큰이 빡빡하면 대화 기록보다 먼저 잘림.
|
||||
fullSystemPrompt = `${systemPrompt}${internetCtx}${designerCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}${casualCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
|
||||
}
|
||||
// ──────────────────────────────────────────────────────────────────
|
||||
// [Context Limit Manager] context length 는 "답변을 그만큼 길게 써도 된다"
|
||||
@@ -980,8 +991,11 @@ export class AgentExecutor {
|
||||
});
|
||||
}
|
||||
const outputTokens = estimateTokens(assistantContent);
|
||||
const notice = shouldShowTruncationNotice(stopKind, outputTokens, maxOutputTokens)
|
||||
? truncationNotice(stopKind)
|
||||
// Show the "incomplete" notice when the engine said output-limit/context-overflow/error,
|
||||
// OR when (after all auto-continuation rounds) the answer still plainly ends mid-sentence.
|
||||
const notice =
|
||||
shouldShowTruncationNotice(stopKind, outputTokens, maxOutputTokens) ? truncationNotice(stopKind)
|
||||
: looksCutOff(assistantContent) ? truncationNotice('output-limit')
|
||||
: '';
|
||||
if (notice && assistantContent.trim()) {
|
||||
assistantContent = assistantContent.trimEnd() + notice;
|
||||
|
||||
@@ -125,8 +125,36 @@ export function shouldFinalOnlyRetry(cleaned: CleanedAssistantOutput): boolean {
|
||||
}
|
||||
|
||||
/**
|
||||
* Should we silently continue from where the answer was cut off? Only when it actually hit the
|
||||
* output-token ceiling and we already have a non-trivial visible answer to continue from.
|
||||
* Does the answer plainly end mid-sentence / mid-structure? Conservative — only flags *unambiguous*
|
||||
* incompleteness (a complete Korean sentence may legitimately end without a period, so we never flag
|
||||
* a plain syllable like `다`/`요`; we only flag connective particles, mid-English-words, mid-clause
|
||||
* commas/colons, unclosed code fences/brackets, and dangling markdown bullets/headings).
|
||||
*/
|
||||
export function looksCutOff(text: string): boolean {
|
||||
const t = (text || '').replace(/\s+$/, '');
|
||||
if (t.length < 12) return false;
|
||||
// unclosed code fence
|
||||
if ((t.match(/```/g) || []).length % 2 === 1) return true;
|
||||
// ends with an opening bracket / quote (unclosed pair)
|
||||
if (/[([{“‘"'`]$/.test(t)) return true;
|
||||
// dangling markdown bullet / heading / blockquote with no content after the marker
|
||||
if (/(?:^|\n)\s*(?:[-*+]|#{1,6}|>|\d+\.)\s*$/.test(t)) return true;
|
||||
// ends mid-English-word or mid-number
|
||||
if (/[A-Za-z0-9]$/.test(t)) return true;
|
||||
// ends mid-clause (comma / colon / semicolon / list separator)
|
||||
if (/[,:;·、,]$/.test(t)) return true;
|
||||
// ends with a Korean particle / connective ending that NEVER closes a sentence
|
||||
if (/(?:으로|로서|로써|로|의|에서|에게|한테|에|을|를|과|와|이랑|랑|는|은|이|가|도|만|까지|부터|마다|조차|마저|밖에|뿐|처럼|같이|보다|이나|거나|든지|든가|고|며|면서|면|어서|아서|여서|니까|는데|은데|ㄴ데|지만|던|도록)$/.test(t)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Should we silently continue from where the answer was cut off? The point is to recover regardless
|
||||
* of *why* it stopped, since local engines / SDKs often report the stop reason wrongly or not at all:
|
||||
* - the engine said it hit the output cap (`output-limit`), OR
|
||||
* - it generated close to the cap (a complete answer wouldn't dangle that early), OR
|
||||
* - the visible answer plainly ends mid-sentence and the engine didn't give a clean "done" reason.
|
||||
* Never continues from a too-short fragment, and never from a clean ending (terminal punctuation).
|
||||
*/
|
||||
export function shouldAutoContinue(
|
||||
stopKind: GenerationStopKind,
|
||||
@@ -134,10 +162,14 @@ export function shouldAutoContinue(
|
||||
outputTokens: number,
|
||||
maxOutputTokens: number
|
||||
): boolean {
|
||||
if (stopKind !== 'output-limit') return false;
|
||||
if (!visibleAnswer || visibleAnswer.trim().length < 40) return false;
|
||||
if (!Number.isFinite(maxOutputTokens) || maxOutputTokens <= 0) return true;
|
||||
return outputTokens >= Math.floor(maxOutputTokens * 0.8);
|
||||
const v = (visibleAnswer || '').trim();
|
||||
if (v.length < 24) return false;
|
||||
// These won't be fixed by generating more text — don't auto-continue.
|
||||
if (stopKind === 'user-stopped' || stopKind === 'context-overflow' || stopKind === 'error' || stopKind === 'tool-calls') return false;
|
||||
if (stopKind === 'output-limit') return true;
|
||||
if (Number.isFinite(maxOutputTokens) && maxOutputTokens > 0 && outputTokens >= Math.floor(maxOutputTokens * 0.85)) return true;
|
||||
// 'complete' (eosFound) or 'unknown' but the text is plainly unfinished → continue.
|
||||
return looksCutOff(v);
|
||||
}
|
||||
|
||||
/** Appended to the system prompt for a final-only retry — the previous reply was reasoning-only. */
|
||||
|
||||
@@ -128,7 +128,9 @@ export class LMStudioStreamer implements IChatStreamer {
|
||||
logInfo('LM Studio SDK chat stream finished.', { model: trimmedModel, stopReason, tokensYielded: yielded });
|
||||
}
|
||||
} catch { /* result unavailable on some SDK versions — non-fatal */ }
|
||||
yield { token: '', stopReason: stopReason ?? 'eosFound' };
|
||||
// Don't claim `eosFound` if we couldn't actually read the stop reason — leave it
|
||||
// undefined so the caller treats it as 'unknown' (and its mid-sentence heuristics kick in).
|
||||
yield { token: '', stopReason };
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
+24
-15
@@ -1789,22 +1789,31 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
|
||||
this._currentSessionBrainId = selectedBrainId;
|
||||
|
||||
let agentSkillContext = undefined;
|
||||
if (agentFile && fs.existsSync(agentFile)) {
|
||||
agentSkillContext = fs.readFileSync(agentFile, 'utf8');
|
||||
|
||||
// Merge in any external skill .md files the user has mapped to this
|
||||
// agent. We concatenate into the same agentSkillContext blob so the
|
||||
// rest of the pipeline (agent.ts, agent-mode override) treats them
|
||||
// identically to the agent's own .md — no further changes needed.
|
||||
try {
|
||||
const entry = getOrCreateAgentEntry(agentFile);
|
||||
const bundle = loadExternalSkills(entry.skillFolders);
|
||||
const block = formatSkillsAsPromptBlock(bundle);
|
||||
if (block) {
|
||||
agentSkillContext = `${agentSkillContext.trim()}\n\n${block}`;
|
||||
if (agentFile && agentFile !== 'none' && fs.existsSync(agentFile)) {
|
||||
const fileContent = fs.readFileSync(agentFile, 'utf8');
|
||||
// Guard: a freshly-created agent still has only the placeholder template
|
||||
// ("# Agent Persona: …\n\nAdd your instructions here…"). Treating that as a real
|
||||
// agent prompt just confuses the model — fall back to normal mode and tell the user.
|
||||
const body = fileContent.replace(/^?#\s*Agent\s*Persona\s*:.*$/im, '').trim();
|
||||
const isPlaceholder = !body || /^add your instructions here/i.test(body);
|
||||
if (isPlaceholder) {
|
||||
logInfo('Selected agent has no real instructions — running without agent mode.', { agentFile });
|
||||
this._view?.webview.postMessage({ type: 'lmStudioError', value: '선택한 에이전트에 내용이 없습니다 — 에이전트 프롬프트를 작성한 뒤 다시 시도하세요. (이번 응답은 에이전트 없이 처리합니다)' });
|
||||
} else {
|
||||
agentSkillContext = fileContent;
|
||||
// Merge in any external skill .md files the user has mapped to this agent. We concatenate
|
||||
// into the same agentSkillContext blob so the rest of the pipeline (agent.ts, agent-mode
|
||||
// override) treats them identically to the agent's own .md — no further changes needed.
|
||||
try {
|
||||
const entry = getOrCreateAgentEntry(agentFile);
|
||||
const bundle = loadExternalSkills(entry.skillFolders);
|
||||
const block = formatSkillsAsPromptBlock(bundle);
|
||||
if (block) {
|
||||
agentSkillContext = `${agentSkillContext.trim()}\n\n${block}`;
|
||||
}
|
||||
} catch (e: any) {
|
||||
logError('External skill load failed.', { error: e?.message || String(e) });
|
||||
}
|
||||
} catch (e: any) {
|
||||
logError('External skill load failed.', { error: e?.message || String(e) });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user