chore: v2.2.73 — ASTRA-DEBUG 로그 레벨 + webview CSP font-src 보강

- ASTRA-DEBUG 정상 흐름 로그를 console.error → logInfo/console.log 로 강등
  (chatHandlers, extension, slashRouter): DevTools에 ERR로 찍히던 오탐 제거
- sidebar webview에 명시적 CSP meta 추가 + font-src에 data: 허용
  (sidebar.html, sidebarProvider._getHtml): VS Code outer iframe이 codicon.ttf를
  data:font/ttf 로 inject하면서 기본 CSP에 막혀 매 prompt 마다 violation
  경고가 찍히던 문제 해소
- 누적된 LM Studio / agent / 컨텍스트 매니저 / 테스트 갱신 동반

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
g1nation
2026-05-23 15:52:19 +09:00
parent 36db170844
commit 0712014fcb
43 changed files with 2417 additions and 977 deletions
+328 -80
View File
@@ -53,6 +53,7 @@ import {
} from './retrieval/knowledgeMix';
import {
extractVisibleFinal,
stripMarkdownFormatting,
shouldFinalOnlyRetry,
shouldAutoContinue,
looksCutOff,
@@ -73,6 +74,7 @@ import {
estimateModelParamsB,
type ContextLimits,
} from './lib/contextManager';
import { samplingToRestBody, type LmStudioSampling, type ChatStreamStats } from './lmstudio/streamer';
export interface ChatMessage {
role: 'user' | 'assistant' | 'system';
@@ -208,6 +210,10 @@ export class AgentExecutor {
private historyChangeListener: HistoryChangeListener | undefined;
private runSerial = 0;
private activeRunId = 0;
// v2.2.69 — 모드 전환 감지용. handlePrompt 진입 시 현재 mode signature 를 계산해
// 직전 값과 다르면 system prompt 에 "이전 대화에서 ... 모드 전환됨" 한 줄을 끼운다.
// mode signature 는 (agent skill, multiAgent, company mode, 활성 brain) 의 해시.
private _lastModeSignature: string | null = null;
private transactionManager: TransactionManager;
private sessionManager: SessionManager;
private statusBarManager: StatusBarManager;
@@ -369,6 +375,9 @@ export class AgentExecutor {
this.onSessionEnd();
}
this.chatHistory = [];
// v2.2.69 — 새 세션엔 "이전 모드" 가 없음. mode signature 초기화하지 않으면 첫 메시지에서
// 직전 세션의 mode 와 비교돼 잘못된 bridge 가 끼는 회귀가 생긴다.
this._lastModeSignature = null;
this.emitHistoryChanged();
}
@@ -387,6 +396,7 @@ export class AgentExecutor {
this.onSessionEnd();
}
this.chatHistory = [];
this._lastModeSignature = null;
this.emitHistoryChanged();
}
@@ -633,6 +643,39 @@ export class AgentExecutor {
// 제거하고 에이전트 프롬프트를 최후단에 배치하여 절대 우선 적용.
// ──────────────────────────────────────────────────────────────────
const isAgentMode = !!options.agentSkillContext;
// v2.2.69 — 모드 전환 bridge. 현재 mode signature 를 직전 값과 비교해 바뀌었으면
// "이전 대화는 X 모드에서 Y 주제로 진행됨 / 지금부터 Z 모드" 한 줄을 system prompt 에 끼운다.
// chatHistory 자체는 손대지 않으므로 사용자 입장에선 대화가 연속되어 보이면서도
// 모델은 "모드가 바뀐 직후" 임을 인지한다.
let modeBridgeCtx = '';
try {
const agentSkillName = options.agentSkillContext
? (options.agentSkillContext.split('\n')[0] || '').slice(0, 60).replace(/^#\s*/, '').trim()
: '';
const currentSig = this.computeModeSignature({
agentSkillName: agentSkillName || undefined,
companyMode: !!(options as any).companyMode,
multiAgent: !!(options as any).multiAgent,
brainName: getActiveBrainProfile()?.name,
});
if (this._lastModeSignature !== null && this._lastModeSignature !== currentSig) {
const topic = this.buildLastTopicLine();
const bridgeLines = [
'',
'[MODE TRANSITION BRIDGE]',
`이전 모드: ${this._lastModeSignature}`,
`현재 모드: ${currentSig}`,
];
if (topic) bridgeLines.push(`직전 대화 주제(한 줄): ${topic}`);
bridgeLines.push('대화 history 는 그대로 이어진다. 새 모드의 페르소나/포맷을 따르되, 직전까지 사용자가 다루던 맥락을 잊지 말 것.');
modeBridgeCtx = bridgeLines.join('\n');
}
this._lastModeSignature = currentSig;
} catch (e: any) {
logError('Mode-bridge computation failed (non-fatal).', { error: e?.message || String(e) });
}
let fullSystemPrompt: string;
if (isAgentMode) {
@@ -665,7 +708,7 @@ export class AgentExecutor {
// [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — agentBlock(앞)·reminder(뒤)·negative 는 보호.
// memoryCtx(RAG/메모리/lessons)도 [CONTEXT] 안에 넣어 토큰이 빡빡할 때 대화 기록보다 먼저 잘리게 한다.
fullSystemPrompt = `${agentBlock}\n\n${strippedSystemPrompt}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}${agentTailReminder}`;
fullSystemPrompt = `${agentBlock}${modeBridgeCtx ? '\n\n' + modeBridgeCtx : ''}\n\n${strippedSystemPrompt}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}${agentTailReminder}`;
} else {
// 기존 Astra 모드 (에이전트 미선택)
const localProjectKnowledgeCtx = prompt && localPathContext && this.isProjectKnowledgeCreationRequest(prompt)
@@ -700,7 +743,7 @@ export class AgentExecutor {
})()
: '';
// memoryCtx(RAG/메모리/lessons)는 [CONTEXT] 안에 — 토큰이 빡빡하면 대화 기록보다 먼저 잘림.
fullSystemPrompt = `${systemPrompt}${designerCtx}${projectArchitectureCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}${knowledgeMixCtx}${casualCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
fullSystemPrompt = `${systemPrompt}${modeBridgeCtx ? '\n\n' + modeBridgeCtx : ''}${designerCtx}${projectArchitectureCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}${knowledgeMixCtx}${casualCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
}
// ──────────────────────────────────────────────────────────────────
// [Context Limit Manager] context length 는 "답변을 그만큼 길게 써도 된다"
@@ -768,14 +811,17 @@ export class AgentExecutor {
);
let budgetedHistory: ChatMessage[] = reqMessages;
if (config.autoCompactHistory) {
const trim = trimHistoryToBudget<ChatMessage>(reqMessages, historyBudget, (n) => ({
// v2.2.69 — dropped 메시지를 받아 heuristic 요약을 만든 뒤 한 system 메시지로 prepend.
// 단순 count 마커는 "이전에 무슨 얘기를 했는지" 를 전혀 알려주지 않아 후속 턴에서 모델이
// 맥락을 잃어버리는 회귀를 낳았다. 이제는 U1/A1/U2/A2 골자가 남아 sliding window 가 동작.
const trim = trimHistoryToBudget<ChatMessage>(reqMessages, historyBudget, (_n, dropped) => ({
role: 'system',
content: `[이전 대화 ${n}개 메시지는 컨텍스트 한계 때문에 이번 요청에서 생략되었습니다. 필요하면 사용자에게 다시 확인하세요.]`,
content: this.buildDroppedHistorySummary(dropped),
internal: true,
}));
budgetedHistory = trim.messages;
if (trim.droppedCount > 0) {
logInfo('Conversation history compacted to fit the context window.', {
logInfo('Conversation history compacted to fit the context window (with summary).', {
model: actualModel, droppedCount: trim.droppedCount, historyBudget,
});
}
@@ -864,8 +910,12 @@ export class AgentExecutor {
// policy enforcement) emits a final `streamReplace` so the bubble
// ends up matching the cleaned answer regardless of what slipped
// through live.
const postLiveDeltas = loopDepth === 0;
// [Clean Stream] g1nation.liveStreamTokens=false (기본) 이면 토큰을 내부에만
// 누적하고 sanitize 끝난 최종 답변만 한 번에 표시 → Harmony/think 마커가 잠깐
// 화면에 노출되는 누설을 원천 차단한다. true 로 두면 legacy 라이브 스트리밍.
const postLiveDeltas = loopDepth === 0 && getConfig().liveStreamTokens === true;
let lmStudioStats: ChatStreamStats | undefined;
if (useLmStudioSdk) {
apiUrl = `${ollamaUrl} (sdk)`;
logInfo('Streaming chat via LM Studio SDK.', { model: actualModel });
@@ -876,15 +926,35 @@ export class AgentExecutor {
temperature,
maxTokens: maxOutputTokens,
contextOverflowPolicy: config.contextOverflowPolicy,
...this.lmStudioSamplingFromConfig(),
...this.lmStudioRespondExtrasFromConfig(),
signal: this.abortController.signal,
});
for await (const { token, stopReason } of stream) {
for await (const { token, stopReason, stats } of stream) {
if (this.isStaleRun(runId)) return;
if (token) {
aiResponseText += token;
if (postLiveDeltas) this.webview.postMessage({ type: 'streamChunk', value: token });
}
if (stopReason) finishStopReason = stopReason;
if (stats) lmStudioStats = stats;
}
if (lmStudioStats && getConfig().lmStudioShowStatsInBudget && loopDepth === 0) {
this.webview.postMessage({
type: 'lmStudioStats',
value: {
model: actualModel,
tokensPerSecond: lmStudioStats.tokensPerSecond,
timeToFirstTokenSec: lmStudioStats.timeToFirstTokenSec,
predictedTokensCount: lmStudioStats.predictedTokensCount,
promptTokensCount: lmStudioStats.promptTokensCount,
totalTimeSec: lmStudioStats.totalTimeSec,
draftModelKey: lmStudioStats.draftModelKey,
draftTokensCount: lmStudioStats.draftTokensCount,
acceptedDraftTokensCount: lmStudioStats.acceptedDraftTokensCount,
stopReason: finishStopReason,
},
});
}
} catch (err: any) {
if (err?.name === 'AbortError' || this.abortController.signal.aborted) {
@@ -1007,60 +1077,34 @@ export class AgentExecutor {
//
// Only attempts recovery on loopDepth === 0 — we don't want to
// ping-pong inside the autonomous action loop.
//
// Note: the previous SDK handle-reset retry that lived here is now done
// inside `LMStudioStreamer.stream()` itself (it auto-recreates the SDK
// on attempt 2 for both dead-handle errors *and* clean-but-empty streams),
// so by the time we get here with `useLmStudioSdk` and no text, the SDK
// path has already tried twice. Go straight to the REST fallback.
if (!aiResponseText.trim() && !this.abortController?.signal.aborted && loopDepth === 0) {
if (useLmStudioSdk && this.options.lmStudioStreamer?.resetHandle) {
try {
logInfo('Empty SDK stream — resetting LM Studio handle and retrying streaming once.', { model: actualModel });
await this.options.lmStudioStreamer.resetHandle(actualModel);
const retryStream = this.options.lmStudioStreamer.stream({
modelName: actualModel,
messages: messagesForRequest.map((m) => ({ role: m.role, content: m.content })),
temperature,
maxTokens: maxOutputTokens,
contextOverflowPolicy: config.contextOverflowPolicy,
signal: this.abortController.signal,
});
let retryText = '';
for await (const { token, stopReason } of retryStream) {
if (this.isStaleRun(runId)) return;
if (token) {
retryText += token;
if (postLiveDeltas) this.webview.postMessage({ type: 'streamChunk', value: token });
}
if (stopReason) finishStopReason = stopReason;
}
if (retryText.trim()) {
aiResponseText = retryText;
logInfo('Handle-reset retry recovered the answer.', { model: actualModel, length: retryText.length });
}
} catch (retryErr: any) {
logError('Handle-reset retry failed.', { model: actualModel, error: retryErr?.message ?? String(retryErr) });
}
}
if (!aiResponseText.trim() && !this.abortController?.signal.aborted) {
try {
logInfo('Empty stream — trying non-streaming fallback.', { engine, model: actualModel, apiUrl });
const fallback = await this.callNonStreaming({
baseUrl: ollamaUrl,
modelName: actualModel,
engine,
messages: messagesForRequest,
temperature,
maxTokens: maxOutputTokens,
contextLength: ctxLimits.contextLength,
signal: this.abortController?.signal,
});
if (fallback.stopReason) finishStopReason = fallback.stopReason;
if (fallback.text && fallback.text.trim()) {
aiResponseText = fallback.text;
logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.text.length });
}
} catch (recoverErr: any) {
logError('Non-streaming fallback also failed.', {
engine, model: actualModel, error: recoverErr?.message ?? String(recoverErr),
});
try {
logInfo('Empty stream — trying non-streaming fallback.', { engine, model: actualModel, apiUrl });
const fallback = await this.callNonStreaming({
baseUrl: ollamaUrl,
modelName: actualModel,
engine,
messages: messagesForRequest,
temperature,
maxTokens: maxOutputTokens,
contextLength: ctxLimits.contextLength,
signal: this.abortController?.signal,
});
if (fallback.stopReason) finishStopReason = fallback.stopReason;
if (fallback.text && fallback.text.trim()) {
aiResponseText = fallback.text;
logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.text.length });
}
} catch (recoverErr: any) {
logError('Non-streaming fallback also failed.', {
engine, model: actualModel, error: recoverErr?.message ?? String(recoverErr),
});
}
}
@@ -1183,7 +1227,12 @@ export class AgentExecutor {
}
if (this.isStaleRun(runId)) return;
}
const cleanedVisible = cleaned.visible;
// [Plain Text Output] outputFormat='plain' (기본)이면 모델이 무심코 내보낸
// 마크다운 마커(`##`, `**`, `> `, `* ` …) 를 후처리로 모두 제거. 라벨 텍스트는 유지.
// markdown 모드면 legacy 그대로 통과.
const cleanedVisible = getConfig().outputFormat === 'plain'
? stripMarkdownFormatting(cleaned.visible)
: cleaned.visible;
// 5. Execute Actions
const rationale = this.parseRationale(cleanedVisible);
@@ -1235,7 +1284,13 @@ export class AgentExecutor {
if (notice && assistantContent.trim()) {
assistantContent = assistantContent.trimEnd() + notice;
}
const finalAssistantContent = assistantContent;
// [Plain Text Output — FINAL pass] enforcer 들이 `## 경로 확인 결과` 같은 하드코딩 헤더를
// 다시 prepend 한 후에도 마커가 남지 않도록, webview / chatHistory 에 들어가는 최종 문자열을
// 한 번 더 sanitize. cleanedVisible 단계의 1차 sanitize 는 model 출력 자체를 정리하고,
// 이 2차 sanitize 는 enforcer 출력까지 모두 청소한다.
const finalAssistantContent = getConfig().outputFormat === 'plain'
? stripMarkdownFormatting(assistantContent)
: assistantContent;
const assistantMessage: ChatMessage = { role: 'assistant', content: finalAssistantContent, internal: false, rationale };
this.chatHistory.push(assistantMessage);
@@ -1470,21 +1525,33 @@ export class AgentExecutor {
: '';
// 워크플로우 매니저에게 설정 기반 실행 위임
const finalReport = await AgentWorkflowManager.runStrictWorkflow(
// [Clean Stream] 단계 진행 메시지는 채팅 본문(streamChunk) 이 아닌 사이드바
// 상단의 workflowStage 인디케이터로만 표시한다 → "생각 단계가 본문에 계속 보임"
// 답답함 제거. 채팅 버블에는 최종 답변만 한 번에 들어간다.
const rawFinalReport = await AgentWorkflowManager.runStrictWorkflow(
prompt,
modelName,
`${brainContext}${selectedAgentContext}${designerContext}`,
signal,
(step, msg) => {
this.webview?.postMessage({ type: 'autoContinue', value: `${step}: ${msg}` });
// 각 단계별 시작을 알림
this.webview?.postMessage({ type: 'streamChunk', value: `\n\n> **[${step}]** ${msg}\n\n` });
this.webview?.postMessage({
type: 'workflowStage',
value: { step, message: msg, done: step === '완료' || step === '오류' }
});
}
);
if (signal.aborted || !this.webview) return;
this.webview.postMessage({ type: 'streamChunk', value: `\n\n--- \n\n${finalReport}` });
// [Plain Text Output] Synthesizer가 잘 따라줬어도 작은 모델은 `##` `**` 를 흘리는 경우가 있어
// 최종 후처리로 한 번 더 마커를 벗긴다. 채팅 history 에도 정제된 결과만 남겨 다음 턴 컨텍스트에서
// 마커가 재학습되는 일을 막는다.
const finalReport = getConfig().outputFormat === 'plain'
? stripMarkdownFormatting(rawFinalReport)
: rawFinalReport;
this.webview.postMessage({ type: 'streamChunk', value: finalReport });
this.webview.postMessage({ type: 'workflowStage', value: { step: '완료', message: '', done: true } });
this.webview.postMessage({ type: 'streamEnd' });
this.chatHistory.push({ role: 'assistant', content: finalReport });
@@ -1494,6 +1561,8 @@ export class AgentExecutor {
this.webview.postMessage({ type: 'autoContinue', value: '✅ 모든 분석이 성공적으로 완료되었습니다.' });
} catch (error: any) {
// 어떤 종료 경로에서든 stage indicator 는 반드시 닫는다 — 안 닫으면 사이드바에 영원히 "③ 자기 검증..." 가 남는다.
this.webview?.postMessage({ type: 'workflowStage', value: { step: '완료', message: '', done: true } });
if (error.name === 'AbortError' || error.message?.includes('cancelled')) {
this.statusBarManager.updateStatus(AgentStatus.Idle, 'Workflow Cancelled');
return;
@@ -1537,10 +1606,23 @@ export class AgentExecutor {
temperature: 0.3,
maxTokens: subMaxTokens,
contextOverflowPolicy,
...this.lmStudioSamplingFromConfig(),
...this.lmStudioRespondExtrasFromConfig(),
signal: this.abortController?.signal,
});
for await (const { token } of stream) {
let subStopReason: string | undefined;
for await (const { token, stopReason } of stream) {
if (token) responseText += token;
if (stopReason) subStopReason = stopReason;
}
// Sub-agent answers that got cut mid-sentence corrupt the pipeline silently
// (Planner produces a half-step, Writer can't recover). Surface a warn log so
// the operator can raise subMaxTokens or pick a less aggressive output budget.
if (subStopReason && /maxPredicted|context|truncat/i.test(subStopReason)) {
logError('Sub-agent answer hit a generation limit.', {
role, model: modelName, stopReason: subStopReason,
chars: responseText.length, maxTokens: subMaxTokens,
});
}
return responseText;
} catch (err: any) {
@@ -1726,12 +1808,13 @@ export class AgentExecutor {
return [
'Intent operating contract — Code Review:',
'The user wants a real review, not a meta-plan of how to review.',
'Required sections in this exact order, in Korean:',
' 1. ## 한 줄 판단 — one sentence: would you rely on this today, and under what constraint?',
' 2. ## 잘된 점 — 2~4 concrete strengths. Each MUST cite a specific file path (and a function or section if you can name one) and explain WHY it works, not just that it exists.',
' 3. ## 부족한 점 — 2~4 concrete weaknesses or risks. Same rule: cite a specific file/area, name the actual problem (race condition, missing retry, coupling, etc.), and say what breaks because of it.',
' 4. ## 사용자 관점 개선 — 2~4 changes phrased from the END USER\'s perspective ("when X happens, the user currently sees Y; they should see Z"). Tie each to a code location that needs to change.',
' 5. ## 다음 한 수 — exactly one next action, small enough to do this week.',
'OUTPUT FORMAT: PLAIN TEXT only. Section labels are bare words on their own line (no "#", "##", "**", "__", "> "). Bullets use "- ". Long answers MUST start with a "핵심 요약" block (2~4 bullets) before any detail.',
'Required sections in this exact order, in Korean (each label appears as a plain line, NOT a markdown heading):',
' 1) 한 줄 판단 — one sentence: would you rely on this today, and under what constraint?',
' 2) 잘된 점 — 2~4 concrete strengths. Each MUST cite a specific file path (and a function or section if you can name one) and explain WHY it works, not just that it exists.',
' 3) 부족한 점 — 2~4 concrete weaknesses or risks. Same rule: cite a specific file/area, name the actual problem (race condition, missing retry, coupling, etc.), and say what breaks because of it.',
' 4) 사용자 관점 개선 — 2~4 changes phrased from the END USER\'s perspective ("when X happens, the user currently sees Y; they should see Z"). Tie each to a code location that needs to change.',
' 5) 다음 한 수 — exactly one next action, small enough to do this week.',
'',
'Hard rules — these are the things that made past reviews feel like a template:',
'- Do NOT write meta-sentences like "확인해야 합니다", "다음 리뷰에서는 ~를 보면 됩니다", "~로 보입니다", "~인지 확인하는 것이 핵심입니다". Either you observed it or you read the file with <read_file> right now.',
@@ -1998,12 +2081,53 @@ export class AgentExecutor {
return false;
}
const complexByShape = prompt.length > 180 || /(보고서|심층|종합\s*분석|리서치|조사|전략\s*수립|기획안|제안서|roadmap|research|report|deep\s*analysis|strategy|proposal)/i.test(prompt);
if (!complexByShape) {
const cfg = getConfig();
const mode = cfg.workflowMultiAgentMode || 'auto';
// 'off' → 기존 키워드/길이 휴리스틱만 사용 (legacy multiAgentEnabled 토글 존중).
if (mode === 'off') {
const legacyComplex = prompt.length > 180 || /(보고서|심층|종합\s*분석|리서치|조사|전략\s*수립|기획안|제안서|roadmap|research|report|deep\s*analysis|strategy|proposal)/i.test(prompt);
if (!legacyComplex) return false;
return configEnabled || /(보고서|심층|종합\s*분석|리서치|조사|전략\s*수립|기획안|제안서|research|report|deep\s*analysis|strategy|proposal)/i.test(prompt);
}
// 인사·잡담은 5단계 파이프라인 낭비. 짧은 casual prompt 는 제외.
if (this.isCasualConversationPrompt(prompt)) {
return false;
}
if (prompt.trim().length < 12) {
return false;
}
return configEnabled || /(보고서|심층|종합\s*분석|리서치|조사|전략\s*수립|기획안|제안서|research|report|deep\s*analysis|strategy|proposal)/i.test(prompt);
// 'always' → 위 가드만 통과하면 무조건 발동.
if (mode === 'always') return true;
// 'auto' → 다음 중 하나라도 만족하면 발동:
// (1) 사용자가 multiAgentEnabled 를 명시적으로 켰다,
// (2) 작은 모델 (≤4B params) 이라 한 번에 처리하기 위험,
// (3) prompt 토큰이 효과적 context window 의 임계 이상을 차지한다,
// (4) "보고서/리뷰/심층 분석" 같은 명백한 복합 작업 키워드 매치,
// (5) prompt 길이 자체가 큼 (>240 chars).
if (configEnabled) return true;
const paramB = estimateModelParamsB(cfg.defaultModel);
if (paramB !== null && paramB <= 4) return true;
try {
const effectiveCtx = cfg.smallModelContextCap > 0 && paramB !== null && paramB <= 4
? cfg.smallModelContextCap
: cfg.contextLength;
const promptTokens = estimateTokens(prompt);
const threshold = Math.floor(effectiveCtx * cfg.workflowAutoCtxFractionThreshold);
if (promptTokens >= threshold) return true;
} catch { /* 안전한 폴백: 키워드/길이 체크로 진행 */ }
if (/(보고서|심층|종합\s*분석|리서치|조사|전략\s*수립|기획안|제안서|코드\s*리뷰|리뷰|아키텍처|architecture|research|report|deep\s*analysis|strategy|proposal|review)/i.test(prompt)) {
return true;
}
if (prompt.length > 240) return true;
return false;
}
private buildAstraModeArchitectureContext(prompt: string): string {
@@ -2129,6 +2253,78 @@ export class AgentExecutor {
}
}
/**
* v2.2.69 — sliding-window 가 잘라낸 메시지들을 한 줄 요약으로 압축.
* 추가 LLM 호출 없이 heuristic 으로:
* - 사용자 prompt 첫 문장
* - assistant 답변 첫 문장 (conclusion-first 가정 — R1)
* 만 추출해 시간순으로 이어붙인다. 모델이 "이전에 무슨 얘기를 했는지" 의 골자만 알면 충분.
* `## ` 같은 마크다운 마커는 떼서 깔끔한 plain text 로 만든다.
*/
private buildDroppedHistorySummary(dropped: ChatMessage[]): string {
if (dropped.length === 0) return '';
const lines: string[] = [];
const firstSentence = (s: string): string => {
const cleaned = String(s || '')
.replace(/^\s{0,3}#{1,6}\s+/gm, '')
.replace(/\*\*/g, '')
.replace(/`{3}[\s\S]*?`{3}/g, '[code]')
.replace(/\s+/g, ' ')
.trim();
// 첫 문장 (마침표/물음표/줄바꿈 기준) — 너무 길면 140자 cap.
const m = cleaned.match(/^[^.!?。\n]{1,140}[.!?。]?/);
const out = (m ? m[0] : cleaned.slice(0, 140)).trim();
return out;
};
let userTurnIdx = 0;
for (const msg of dropped) {
if (msg.internal) continue;
const content = typeof msg.content === 'string' ? msg.content : '';
if (!content.trim()) continue;
if (msg.role === 'user') {
userTurnIdx++;
lines.push(`U${userTurnIdx}: ${firstSentence(content)}`);
} else if (msg.role === 'assistant') {
lines.push(`A${userTurnIdx}: ${firstSentence(content)}`);
}
}
// 너무 많으면 가장 오래된 절반은 한 줄로 합치고 최근 N개만 보존.
const MAX_LINES = 8;
if (lines.length > MAX_LINES) {
const tail = lines.slice(-MAX_LINES);
const head = lines.slice(0, lines.length - MAX_LINES);
return `[이전 대화 요약 — 총 ${dropped.length}개 메시지가 컨텍스트 한계로 생략됨]\n(더 오래된 ${head.length}개 턴 생략됨)\n${tail.join('\n')}`;
}
return `[이전 대화 요약 — 총 ${dropped.length}개 메시지가 컨텍스트 한계로 생략됨]\n${lines.join('\n')}`;
}
/**
* v2.2.69 — 현재 요청의 mode signature 를 계산.
* mode 가 직전과 다르면 system prompt 에 "이전 모드: X / 현재 모드: Y" 한 줄 brige 를 끼울 수 있다.
*/
private computeModeSignature(opts: { agentSkillName?: string; companyMode?: boolean; multiAgent?: boolean; brainName?: string }): string {
const parts = [
`agent=${opts.agentSkillName || 'none'}`,
`company=${opts.companyMode ? 'on' : 'off'}`,
`multi=${opts.multiAgent ? 'on' : 'off'}`,
`brain=${opts.brainName || '?'}`,
];
return parts.join('|');
}
/**
* v2.2.69 — chatHistory 의 마지막 user/assistant 턴에서 사용자가 무슨 주제를 다루고 있었는지
* 한 줄로 뽑아 모드 전환 bridge 에 쓸 "이전 맥락" 문장을 만든다. 비어 있으면 빈 문자열.
*/
private buildLastTopicLine(): string {
const recent = this.chatHistory.filter(m => !m.internal && (m.role === 'user' || m.role === 'assistant'));
if (recent.length === 0) return '';
const lastUser = [...recent].reverse().find(m => m.role === 'user');
if (!lastUser || typeof lastUser.content !== 'string') return '';
const topic = lastUser.content.replace(/\s+/g, ' ').trim().slice(0, 120);
return topic;
}
private buildRequestHistory(history: ChatMessage[]): ChatMessage[] {
return history.map((message) => {
if (message.role !== 'assistant' || typeof message.content !== 'string') {
@@ -2957,17 +3153,23 @@ export class AgentExecutor {
// 같은 엔진 내에서만 model candidate / message variant retry
for (const candidateModel of modelCandidates) {
for (const variant of messageVariants) {
const sampling = samplingToRestBody(this.lmStudioSamplingFromConfig());
const streamBody = {
model: candidateModel,
messages: variant.messages,
stream: true,
...(engine === 'lmstudio'
? { max_tokens: maxTokens, temperature }
: { options: { num_ctx: numCtx, num_predict: maxTokens, temperature } }),
// LM Studio's OpenAI-compatible REST extends the schema with top_k/min_p/
// repeat_penalty (same names as Ollama). Spread the shared sampling block so
// the REST fallback matches the SDK path — without it a fallback after a
// dead handle quietly loses the glitch-suppression preset.
? { max_tokens: maxTokens, temperature, ...sampling }
: { options: { num_ctx: numCtx, num_predict: maxTokens, temperature, ...sampling } }),
};
// 일시적 네트워크 오류용 retry (최대 2회, 지수 backoff)
const MAX_RETRIES = 2;
let serviceDown = false;
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
try {
if (attempt > 0) {
@@ -3013,13 +3215,33 @@ export class AgentExecutor {
if (lastError.name === 'AbortError') {
throw lastError;
}
// ECONNREFUSED / DNS-level failures mean the engine process isn't even
// listening — no amount of retries or message-variant juggling will help.
// Abandon the candidate/variant loops now and surface the "is X running?"
// error fast instead of burning 12 fetch attempts before giving up.
const errCode = (error?.cause?.code ?? error?.code ?? '').toString();
const errMsg = lastError.message;
if (
errCode === 'ECONNREFUSED' || errCode === 'ENOTFOUND' || errCode === 'EAI_AGAIN'
|| /ECONNREFUSED|ENOTFOUND|getaddrinfo|fetch failed/i.test(errMsg)
) {
serviceDown = true;
logError('AI streaming request: engine appears to be down.', {
engine, apiUrl, code: errCode, error: errMsg,
});
break; // exit retry loop
}
logError('AI streaming request failed.', {
engine, variant: variant.name, apiUrl, model: candidateModel,
attempt, error: lastError.message
});
}
}
if (serviceDown) break; // skip remaining variants
}
// serviceDown also short-circuits the model-candidate loop — there is no
// candidate / variant the engine can answer if it isn't listening at all.
if (lastError && /ECONNREFUSED|ENOTFOUND|fetch failed/i.test(lastError.message)) break;
}
// 명확한 에러 메시지: 어느 엔진이 실패했는지 사용자에게 알림
@@ -3151,13 +3373,14 @@ export class AgentExecutor {
const numCtx = Math.max(2048, params.contextLength ?? 32768);
const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
const variants = this.buildEngineMessageVariants(messages, engine);
const sampling = samplingToRestBody(this.lmStudioSamplingFromConfig());
const body = {
model: modelName,
messages: variants[0].messages,
stream: false,
...(engine === 'lmstudio'
? { max_tokens: maxTokens, temperature }
: { options: { num_ctx: numCtx, num_predict: maxTokens, temperature } }),
? { max_tokens: maxTokens, temperature, ...sampling }
: { options: { num_ctx: numCtx, num_predict: maxTokens, temperature, ...sampling } }),
};
const response = await fetch(apiUrl, {
method: 'POST',
@@ -3231,6 +3454,8 @@ export class AgentExecutor {
temperature: params.temperature,
maxTokens: params.maxTokens,
contextOverflowPolicy: params.contextOverflowPolicy,
...this.lmStudioSamplingFromConfig(),
...this.lmStudioRespondExtrasFromConfig(),
signal: params.signal,
});
for await (const { token, stopReason } of stream) {
@@ -3356,6 +3581,29 @@ export class AgentExecutor {
];
}
/**
* Build the shared LM Studio sampling block from current config. Used by both the
* SDK streamer (topPSampling/topKSampling/…) and the REST body (top_p/top_k/…)
* so the two paths produce equivalent answers — otherwise a REST fallback after a
* dead SDK handle would silently drop the glitch-suppression that the SDK applies
* (한글 토큰 깨짐 등). Ollama also accepts these field names inside `options`.
*/
private lmStudioSamplingFromConfig(): LmStudioSampling {
const c = getConfig();
return {
topP: c.lmStudioTopP,
topK: c.lmStudioTopK,
minP: c.lmStudioMinP,
repeatPenalty: c.lmStudioRepeatPenalty,
};
}
/** SDK-only extras for `respond()` — currently the draft model for speculative decoding. */
private lmStudioRespondExtrasFromConfig(): { draftModel?: string } {
const c = getConfig();
return c.lmStudioDraftModel ? { draftModel: c.lmStudioDraftModel } : {};
}
private buildModelCandidates(modelName: string, engine: 'lmstudio' | 'ollama'): string[] {
const candidates = [modelName];
if (engine === 'lmstudio') {