chore: version up to 2.80.34 and package
This commit is contained in:
+276
-37
@@ -40,6 +40,17 @@ import {
|
||||
import { MemoryManager } from './memory';
|
||||
import { RetrievalOrchestrator } from './retrieval';
|
||||
import { resolveScopeForAgent } from './skills/agentKnowledgeMap';
|
||||
import {
|
||||
estimateTokens,
|
||||
estimateMessagesTokens,
|
||||
computeOutputBudget,
|
||||
trimHistoryToBudget,
|
||||
truncateSystemPromptContext,
|
||||
classifyStopReason,
|
||||
truncationNotice,
|
||||
estimateModelParamsB,
|
||||
type ContextLimits,
|
||||
} from './lib/contextManager';
|
||||
|
||||
export interface ChatMessage {
|
||||
role: 'user' | 'assistant' | 'system';
|
||||
@@ -94,7 +105,17 @@ const AGENT_PROMPTS: Record<AgentRole, string> = {
|
||||
3. Deliver a logical, consistent, and polished response.`
|
||||
};
|
||||
|
||||
// Local-path detectors used to decide whether a user prompt refers to a file/dir on disk.
|
||||
// POSIX: /Volumes/, /Users/, /home/, /opt/, ... or ~/ — backtick excluded (markdown code spans).
|
||||
const POSIX_ABS_PATH_SRC = "(?:\\/(?:Volumes|Users|home|opt|srv|mnt|data|workspace)\\/|~\\/)[^\\s`\"'<>|*?]+";
|
||||
// Windows: drive letter (C:\ or C:/) or UNC (\\server\share). Backslash IS allowed as a separator here.
|
||||
const WIN_ABS_PATH_SRC = "(?:[A-Za-z]:[\\\\/]|\\\\\\\\[^\\s\\\\/]+\\\\[^\\s\\\\/]+)[^\\s`\"'<>|*?]*";
|
||||
|
||||
export class AgentExecutor {
|
||||
/** Non-global instances — safe for repeated `.test()` (a shared /g/ regex's lastIndex would corrupt results). */
|
||||
static readonly ABS_PATH_RE = new RegExp(POSIX_ABS_PATH_SRC, 'i');
|
||||
static readonly WIN_ABS_PATH_RE = new RegExp(WIN_ABS_PATH_SRC, 'i');
|
||||
|
||||
private chatHistory: ChatMessage[] = [];
|
||||
private abortController: AbortController | null = null;
|
||||
private webview: vscode.Webview | undefined;
|
||||
@@ -107,6 +128,17 @@ export class AgentExecutor {
|
||||
private memoryManager: MemoryManager;
|
||||
private retrievalOrchestrator: RetrievalOrchestrator;
|
||||
private currentTaskId: string = 'default_session';
|
||||
/** Set by buildMemoryContext after each retrieval — fed to the webview's per-answer "scope used" footer. */
|
||||
private _lastRetrievalInfo: {
|
||||
agentName: string | null;
|
||||
scoped: boolean;
|
||||
source: string;
|
||||
configuredFolders: string[]; // relative to brain root
|
||||
usedBrainFiles: string[]; // relative to brain root
|
||||
usedMemoryLayers: string[]; // raw RetrievalSource ids
|
||||
totalChunks: number;
|
||||
selectedChunks: number;
|
||||
} | null = null;
|
||||
|
||||
private readonly options: AgentExecutorOptions;
|
||||
|
||||
@@ -445,7 +477,8 @@ export class AgentExecutor {
|
||||
].join('\n');
|
||||
|
||||
// 3. 조립: 기본(축소) → 유틸리티 컨텍스트 → 에이전트 프롬프트(최후단)
|
||||
fullSystemPrompt = `${strippedSystemPrompt}${internetCtx}${memoryCtx}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${brainContext}${brainInventoryCtx}\n${contextBlock}${negativeCtx}${agentDirective}`;
|
||||
// [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — agentDirective/negative 는 보호.
|
||||
fullSystemPrompt = `${strippedSystemPrompt}${internetCtx}${memoryCtx}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${brainContext}${brainInventoryCtx}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}${agentDirective}`;
|
||||
} else {
|
||||
// 기존 Astra 모드 (에이전트 미선택)
|
||||
const localProjectKnowledgeCtx = prompt && localPathContext && this.isProjectKnowledgeCreationRequest(prompt)
|
||||
@@ -464,13 +497,95 @@ export class AgentExecutor {
|
||||
"- [충돌] 지식 간 충돌 발생 시 시스템이 독단적으로 판단하지 말고, 반드시 [CONFLICT WARNING] 플래그와 함께 상충되는 두 관점을 모두 명시하여 사용자에게 판단을 위임할 것."
|
||||
].join('\n');
|
||||
|
||||
fullSystemPrompt = `${systemPrompt}${internetCtx}${memoryCtx}${designerCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}\n\n[CONTEXT]\n${brainContext}${brainInventoryCtx}\n${contextBlock}${negativeCtx}`;
|
||||
// [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — negative constraints 는 보호.
|
||||
fullSystemPrompt = `${systemPrompt}${internetCtx}${memoryCtx}${designerCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}\n\n[CONTEXT]\n${brainContext}${brainInventoryCtx}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
|
||||
}
|
||||
// ──────────────────────────────────────────────────────────────────
|
||||
// [Context Limit Manager] context length 는 "답변을 그만큼 길게 써도 된다"
|
||||
// 는 뜻이 아니다: 시스템 프롬프트 + 대화 기록 + 입력 + 생성될 답변 + 여유분 ≤ context length.
|
||||
// 요청을 보내기 전에 입력 토큰을 추정해서
|
||||
// (1) 시스템 프롬프트가 과하면 [CONTEXT] 블록을 마지막 수단으로 줄이고
|
||||
// (2) 대화 기록을 남은 예산에 맞게 압축하고 (UI 표시용 chatHistory 는 건드리지 않음)
|
||||
// (3) 동적으로 출력 상한(maxOutputTokens)을 계산한다.
|
||||
// ──────────────────────────────────────────────────────────────────
|
||||
// Small models (≤4B) routinely fail on prompts that fit their *nominal* context but
|
||||
// exceed their *effective* capability (server log shows truncated=0 yet eval time≈0ms —
|
||||
// the model emitted EOS as the first token). When detected, budget against a smaller
|
||||
// effective window so the system prompt / RAG / history get shrunk proactively.
|
||||
const modelParamB = estimateModelParamsB(actualModel);
|
||||
const smallModelCap = config.smallModelContextCap; // 0 disables this guard
|
||||
const cappedForSmallModel = smallModelCap > 0
|
||||
&& modelParamB !== null && modelParamB <= 4
|
||||
&& config.contextLength > smallModelCap;
|
||||
const effectiveContextLength = cappedForSmallModel ? smallModelCap : config.contextLength;
|
||||
if (cappedForSmallModel) {
|
||||
logInfo('Small model detected — capping effective context window for budgeting.', {
|
||||
model: actualModel, paramB: modelParamB,
|
||||
nominalContext: config.contextLength, effectiveContext: effectiveContextLength,
|
||||
});
|
||||
}
|
||||
const ctxLimits: ContextLimits = {
|
||||
contextLength: effectiveContextLength,
|
||||
maxOutputTokens: config.maxOutputTokens,
|
||||
safetyMargin: config.contextSafetyMargin,
|
||||
minOutputTokens: 512,
|
||||
};
|
||||
const imageCount = (reqMessages as any[])
|
||||
.reduce((n, m) => n + (Array.isArray(m?.images) ? m.images.length : 0), 0);
|
||||
const imageTokenReserve = imageCount * 1024;
|
||||
|
||||
// (1) 시스템 프롬프트는 예산의 ~65%까지만 허용 — 그 이상이면 [CONTEXT] 블록부터 잘라낸다.
|
||||
const systemCapTokens = Math.max(
|
||||
1024,
|
||||
Math.floor((ctxLimits.contextLength - ctxLimits.safetyMargin - ctxLimits.minOutputTokens - imageTokenReserve) * 0.65)
|
||||
);
|
||||
const { prompt: budgetedSystemPrompt, truncated: systemTruncated } =
|
||||
truncateSystemPromptContext(fullSystemPrompt, systemCapTokens);
|
||||
if (systemTruncated) {
|
||||
logInfo('System prompt context truncated to fit the context window.', { model: actualModel, systemCapTokens });
|
||||
}
|
||||
const systemTokens = estimateTokens(budgetedSystemPrompt) + 4;
|
||||
|
||||
// (2) 대화 기록 압축.
|
||||
const historyBudget = Math.max(
|
||||
256,
|
||||
ctxLimits.contextLength - systemTokens - ctxLimits.safetyMargin - ctxLimits.minOutputTokens - imageTokenReserve
|
||||
);
|
||||
let budgetedHistory: ChatMessage[] = reqMessages;
|
||||
if (config.autoCompactHistory) {
|
||||
const trim = trimHistoryToBudget<ChatMessage>(reqMessages, historyBudget, (n) => ({
|
||||
role: 'system',
|
||||
content: `[이전 대화 ${n}개 메시지는 컨텍스트 한계 때문에 이번 요청에서 생략되었습니다. 필요하면 사용자에게 다시 확인하세요.]`,
|
||||
internal: true,
|
||||
}));
|
||||
budgetedHistory = trim.messages;
|
||||
if (trim.droppedCount > 0) {
|
||||
logInfo('Conversation history compacted to fit the context window.', {
|
||||
model: actualModel, droppedCount: trim.droppedCount, historyBudget,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const messagesForRequest: ChatMessage[] = [
|
||||
{ role: 'system', content: fullSystemPrompt, internal: true },
|
||||
...reqMessages
|
||||
{ role: 'system', content: budgetedSystemPrompt, internal: true },
|
||||
...budgetedHistory
|
||||
];
|
||||
|
||||
// (3) 동적 출력 상한.
|
||||
const inputTokens = estimateMessagesTokens(messagesForRequest) + imageTokenReserve;
|
||||
const outputBudget = computeOutputBudget(inputTokens, ctxLimits);
|
||||
const maxOutputTokens = outputBudget.maxOutputTokens;
|
||||
if (outputBudget.tight) {
|
||||
logError('Prompt nearly fills the context window — output budget is at the minimum.', {
|
||||
model: actualModel, contextLength: ctxLimits.contextLength, inputTokens, maxOutputTokens,
|
||||
});
|
||||
}
|
||||
logInfo('Context budget computed.', {
|
||||
model: actualModel, contextLength: ctxLimits.contextLength,
|
||||
inputTokens, maxOutputTokens, droppedHistory: reqMessages.length - budgetedHistory.length,
|
||||
});
|
||||
let finishStopReason: string | undefined;
|
||||
|
||||
// 4. Call AI Engine
|
||||
this.abortController = new AbortController();
|
||||
requestTimeoutHandle = setTimeout(() => {
|
||||
@@ -485,6 +600,30 @@ export class AgentExecutor {
|
||||
let buffer = '';
|
||||
|
||||
if (loopDepth === 0) {
|
||||
// Context-budget preview so the UI can show what actually went into this turn
|
||||
// (≈N tokens, Brain N files, open file included?, history compacted?, small-model warning).
|
||||
this.webview.postMessage({
|
||||
type: 'contextBudget',
|
||||
value: {
|
||||
model: actualModel,
|
||||
engine,
|
||||
paramB: modelParamB,
|
||||
contextLength: ctxLimits.contextLength,
|
||||
nominalContextLength: config.contextLength,
|
||||
cappedForSmallModel,
|
||||
inputTokens,
|
||||
maxOutputTokens,
|
||||
systemTokens,
|
||||
historyKept: budgetedHistory.length,
|
||||
droppedHistory: reqMessages.length - budgetedHistory.length,
|
||||
systemTruncated,
|
||||
includesOpenFile: !!contextBlock && contextBlock.includes('[Currently open file:'),
|
||||
brainFiles: brainFiles.length,
|
||||
imageCount,
|
||||
tight: outputBudget.tight,
|
||||
smallModel: cappedForSmallModel || (modelParamB !== null && modelParamB <= 3 && inputTokens > 8000),
|
||||
},
|
||||
});
|
||||
this.webview.postMessage({ type: 'streamStart' });
|
||||
this.options.onStreamLifecycle?.start();
|
||||
}
|
||||
@@ -497,18 +636,25 @@ export class AgentExecutor {
|
||||
modelName: actualModel,
|
||||
messages: messagesForRequest.map((m) => ({ role: m.role, content: m.content })),
|
||||
temperature,
|
||||
maxTokens: maxOutputTokens,
|
||||
contextOverflowPolicy: config.contextOverflowPolicy,
|
||||
signal: this.abortController.signal,
|
||||
});
|
||||
for await (const { token } of stream) {
|
||||
for await (const { token, stopReason } of stream) {
|
||||
if (this.isStaleRun(runId)) return;
|
||||
if (token) aiResponseText += token;
|
||||
if (stopReason) finishStopReason = stopReason;
|
||||
}
|
||||
} catch (err: any) {
|
||||
if (err?.name === 'AbortError' || this.abortController.signal.aborted) {
|
||||
logInfo('Generation aborted by user.');
|
||||
} else {
|
||||
logError('LM Studio SDK chat failed.', { engine, error: err?.message ?? String(err) });
|
||||
this.webview?.postMessage({ type: 'error', value: `LM Studio: ${err?.message ?? err}` });
|
||||
const msg = err?.message ?? String(err);
|
||||
if (/context\s*length|contextlengthreached|exceed|too\s*long/i.test(msg)) {
|
||||
finishStopReason = 'contextLengthReached';
|
||||
}
|
||||
logError('LM Studio SDK chat failed.', { engine, error: msg });
|
||||
this.webview?.postMessage({ type: 'error', value: `LM Studio: ${msg}` });
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -516,7 +662,9 @@ export class AgentExecutor {
|
||||
baseUrl: ollamaUrl,
|
||||
modelName: actualModel,
|
||||
reqMessages: messagesForRequest,
|
||||
temperature
|
||||
temperature,
|
||||
maxTokens: maxOutputTokens,
|
||||
contextLength: ctxLimits.contextLength
|
||||
});
|
||||
const { response, apiUrl: restApiUrl } = request;
|
||||
apiUrl = restApiUrl;
|
||||
@@ -551,6 +699,10 @@ export class AgentExecutor {
|
||||
if (token) {
|
||||
aiResponseText += token;
|
||||
}
|
||||
const fr = engine === 'lmstudio'
|
||||
? json.choices?.[0]?.finish_reason
|
||||
: (json.done_reason ?? (json.done === true ? 'stop' : undefined));
|
||||
if (fr) finishStopReason = fr;
|
||||
} catch (e: any) {
|
||||
logError('Failed to parse streaming chunk.', { engine, apiUrl, chunk: summarizeText(trimmed, 300), error: e?.message || String(e) });
|
||||
}
|
||||
@@ -578,6 +730,10 @@ export class AgentExecutor {
|
||||
if (token) {
|
||||
aiResponseText += token;
|
||||
}
|
||||
const fr = engine === 'lmstudio'
|
||||
? json.choices?.[0]?.finish_reason
|
||||
: (json.done_reason ?? (json.done === true ? 'stop' : undefined));
|
||||
if (fr) finishStopReason = fr;
|
||||
} catch (e: any) {
|
||||
logError('Failed to parse final streaming buffer.', { engine, apiUrl, buffer: summarizeText(buffer, 300), error: e?.message || String(e) });
|
||||
}
|
||||
@@ -617,12 +773,15 @@ export class AgentExecutor {
|
||||
modelName: actualModel,
|
||||
messages: messagesForRequest.map((m) => ({ role: m.role, content: m.content })),
|
||||
temperature,
|
||||
maxTokens: maxOutputTokens,
|
||||
contextOverflowPolicy: config.contextOverflowPolicy,
|
||||
signal: this.abortController.signal,
|
||||
});
|
||||
let retryText = '';
|
||||
for await (const { token } of retryStream) {
|
||||
for await (const { token, stopReason } of retryStream) {
|
||||
if (this.isStaleRun(runId)) return;
|
||||
if (token) retryText += token;
|
||||
if (stopReason) finishStopReason = stopReason;
|
||||
}
|
||||
if (retryText.trim()) {
|
||||
aiResponseText = retryText;
|
||||
@@ -642,11 +801,14 @@ export class AgentExecutor {
|
||||
engine,
|
||||
messages: messagesForRequest,
|
||||
temperature,
|
||||
maxTokens: maxOutputTokens,
|
||||
contextLength: ctxLimits.contextLength,
|
||||
signal: this.abortController?.signal,
|
||||
});
|
||||
if (fallback && fallback.trim()) {
|
||||
aiResponseText = fallback;
|
||||
logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.length });
|
||||
if (fallback.stopReason) finishStopReason = fallback.stopReason;
|
||||
if (fallback.text && fallback.text.trim()) {
|
||||
aiResponseText = fallback.text;
|
||||
logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.text.length });
|
||||
}
|
||||
} catch (recoverErr: any) {
|
||||
logError('Non-streaming fallback also failed.', {
|
||||
@@ -688,6 +850,18 @@ export class AgentExecutor {
|
||||
].join('\n');
|
||||
}
|
||||
}
|
||||
// Surface truncated/abnormal generation so the user knows the answer is incomplete.
|
||||
const stopKind = classifyStopReason(finishStopReason);
|
||||
if (stopKind === 'output-limit' || stopKind === 'context-overflow' || stopKind === 'error') {
|
||||
logError('Generation stopped abnormally.', {
|
||||
model: actualModel, engine, stopReason: finishStopReason, stopKind,
|
||||
inputTokens, maxOutputTokens, answerChars: assistantContent.length,
|
||||
});
|
||||
}
|
||||
const notice = truncationNotice(stopKind);
|
||||
if (notice && assistantContent.trim()) {
|
||||
assistantContent = assistantContent.trimEnd() + notice;
|
||||
}
|
||||
const finalAssistantContent = assistantContent;
|
||||
|
||||
const assistantMessage: ChatMessage = { role: 'assistant', content: finalAssistantContent, internal: false, rationale };
|
||||
@@ -700,7 +874,9 @@ export class AgentExecutor {
|
||||
const promptCharCount = messagesForRequest.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
|
||||
logError('Model returned an empty response without actions.', {
|
||||
model: actualModel, engine, apiUrl, loopDepth,
|
||||
promptCharCount, messageCount: messagesForRequest.length,
|
||||
promptCharCount, inputTokens, maxOutputTokens, contextLength: ctxLimits.contextLength,
|
||||
estimatedOverflow: outputBudget.tight, stopReason: finishStopReason,
|
||||
messageCount: messagesForRequest.length,
|
||||
fallbackTried: loopDepth === 0 ? 'yes' : 'no',
|
||||
});
|
||||
// Cheap heuristic: parse a parameter-count hint out of the
|
||||
@@ -717,20 +893,23 @@ export class AgentExecutor {
|
||||
const contextLimitHint =
|
||||
'LM Studio 로그에 `n_tokens = N, truncated = 0` 인데 `eval time` 이 0ms 라면 모델이 첫 토큰부터 EOS 를 뱉은 것입니다. 보통 컨텍스트 한계 초과 또는 모델 용량 부족입니다. 더 큰 모델(7B+)로 교체하거나 컨텍스트를 줄여 보세요.';
|
||||
|
||||
const looksOverflow = outputBudget.tight || inputTokens > ctxLimits.contextLength - ctxLimits.safetyMargin;
|
||||
this.webview.postMessage({
|
||||
type: 'error',
|
||||
value: [
|
||||
'AI 엔진이 빈 응답을 반환했습니다 (스트리밍 + non-streaming 폴백 모두 실패).',
|
||||
`Engine: ${engine}`,
|
||||
`Model: ${actualModel}`,
|
||||
`Prompt size: ${promptCharCount.toLocaleString()} chars across ${messagesForRequest.length} message(s)`,
|
||||
`Prompt: ~${inputTokens.toLocaleString()} tokens (${promptCharCount.toLocaleString()} chars, ${messagesForRequest.length} messages) / context window ${ctxLimits.contextLength.toLocaleString()} tokens`,
|
||||
`Output budget: ${maxOutputTokens.toLocaleString()} tokens`,
|
||||
...(finishStopReason ? [`Stop reason: ${finishStopReason}`] : []),
|
||||
'',
|
||||
'다음을 시도해보세요:',
|
||||
' • LM Studio에서 모델이 실제로 로드되어 있는지 확인',
|
||||
promptCharCount > 16000
|
||||
? ' • 프롬프트가 너무 큽니다 (16k chars 초과). Skill/Brain 컨텍스트를 좁혀 보세요.'
|
||||
looksOverflow
|
||||
? ' • 입력이 모델 context window 에 가깝습니다. `/newChat` 으로 대화를 새로 시작하거나, Skill/Brain 컨텍스트를 줄이거나, Settings 의 `g1nation.contextLength` 를 모델 실제 값으로 맞추세요.'
|
||||
: ' • 다른 모델로 전환하거나 LM Studio 서버를 재시작',
|
||||
' • Settings에서 maxContextSize 또는 memoryLongTermFiles 줄이기',
|
||||
' • Settings에서 maxContextSize / memoryLongTermFiles 줄이기',
|
||||
...(looksSmall || promptIsLarge ? [' • ' + contextLimitHint] : []),
|
||||
].join('\n')
|
||||
});
|
||||
@@ -765,6 +944,12 @@ export class AgentExecutor {
|
||||
}
|
||||
|
||||
this.statusBarManager.updateStatus(AgentStatus.Success);
|
||||
if (this._lastRetrievalInfo) {
|
||||
this.webview.postMessage({
|
||||
type: 'usedScope',
|
||||
value: { ...this._lastRetrievalInfo, hasAgentSelected: !!options.agentSkillFile },
|
||||
});
|
||||
}
|
||||
this.webview.postMessage({ type: 'streamChunk', value: finalAssistantContent });
|
||||
|
||||
} catch (error: any) {
|
||||
@@ -863,12 +1048,17 @@ export class AgentExecutor {
|
||||
|
||||
private async callAgent(role: AgentRole, prompt: string, modelName: string, options: any): Promise<string> {
|
||||
const persona = AGENT_PROMPTS[role];
|
||||
const { ollamaUrl } = getConfig();
|
||||
const { ollamaUrl, contextLength, maxOutputTokens, contextSafetyMargin, contextOverflowPolicy } = getConfig();
|
||||
|
||||
const messages: ChatMessage[] = [
|
||||
{ role: 'system', content: persona },
|
||||
{ role: 'user', content: prompt }
|
||||
];
|
||||
// Dynamic output cap so input + output stays within the context window.
|
||||
const inputTokens = estimateMessagesTokens(messages);
|
||||
const { maxOutputTokens: subMaxTokens } = computeOutputBudget(inputTokens, {
|
||||
contextLength, maxOutputTokens, safetyMargin: contextSafetyMargin, minOutputTokens: 512,
|
||||
});
|
||||
|
||||
const engine = resolveEngine(ollamaUrl);
|
||||
let responseText = '';
|
||||
@@ -879,6 +1069,8 @@ export class AgentExecutor {
|
||||
modelName,
|
||||
messages: messages.map((m) => ({ role: m.role, content: m.content })),
|
||||
temperature: 0.3,
|
||||
maxTokens: subMaxTokens,
|
||||
contextOverflowPolicy,
|
||||
signal: this.abortController?.signal,
|
||||
});
|
||||
for await (const { token } of stream) {
|
||||
@@ -896,7 +1088,9 @@ export class AgentExecutor {
|
||||
baseUrl: ollamaUrl,
|
||||
modelName: modelName,
|
||||
reqMessages: messages,
|
||||
temperature: 0.3 // Use lower temperature for planning and research
|
||||
temperature: 0.3, // Use lower temperature for planning and research
|
||||
maxTokens: subMaxTokens,
|
||||
contextLength
|
||||
});
|
||||
|
||||
const reader = request.response.body?.getReader();
|
||||
@@ -1218,16 +1412,17 @@ export class AgentExecutor {
|
||||
|
||||
/**
|
||||
* 프롬프트에 로컬 파일/디렉토리 경로가 포함되어 있는지 감지합니다.
|
||||
* 절대 경로: /Volumes/, /Users/, /home/, ~/
|
||||
* POSIX 절대 경로: /Volumes/, /Users/, /home/, /opt/, ~/
|
||||
* Windows 절대 경로: C:\..., D:/..., \\server\share\...
|
||||
* 상대 경로: src/..., lib/..., components/..., tests/... 등 + 파일 확장자
|
||||
*/
|
||||
private containsLocalFilePath(prompt: string): boolean {
|
||||
// 절대 경로
|
||||
if (/(?:\/Volumes\/|\/Users\/|\/home\/|~\/)[^\s`"'<>]+/i.test(prompt)) {
|
||||
// 절대 경로 (POSIX + Windows 드라이브 문자 + UNC)
|
||||
if (AgentExecutor.ABS_PATH_RE.test(prompt) || AgentExecutor.WIN_ABS_PATH_RE.test(prompt)) {
|
||||
return true;
|
||||
}
|
||||
// 상대 경로 패턴: 디렉토리/파일명.확장자 형태 (src/lib/engine.ts, components/App.tsx 등)
|
||||
if (/(?:^|[\s,])(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)\//i.test(prompt)
|
||||
// 상대 경로 패턴: 디렉토리/파일명.확장자 형태 (src/lib/engine.ts, components\App.tsx 등)
|
||||
if (/(?:^|[\s,])(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)[\\/]/i.test(prompt)
|
||||
&& /\.[a-z]{1,6}(?:[\s,;)\]]|$)/i.test(prompt)) {
|
||||
return true;
|
||||
}
|
||||
@@ -1551,15 +1746,21 @@ export class AgentExecutor {
|
||||
|
||||
private extractLocalProjectPaths(prompt: string, rootPath?: string): string[] {
|
||||
const results: string[] = [];
|
||||
const stripTrailingPunct = (s: string) => s.replace(/[),.;\]]+$/g, '');
|
||||
|
||||
// 1. 절대 경로 감지: /Volumes/, /Users/, /home/, ~/
|
||||
const absMatches = prompt.match(/(?:\/Volumes\/|\/Users\/|\/home\/|~\/)[^\s`"'<>]+/gi) || [];
|
||||
// 1a. POSIX 절대 경로: /Volumes/, /Users/, /home/, /opt/, ~/
|
||||
const absMatches = prompt.match(new RegExp(POSIX_ABS_PATH_SRC, 'gi')) || [];
|
||||
for (const m of absMatches) {
|
||||
results.push(m.replace(/[),.;\]]+$/g, ''));
|
||||
results.push(stripTrailingPunct(m));
|
||||
}
|
||||
// 1b. Windows 절대 경로: C:\..., D:/..., \\server\share\...
|
||||
const winMatches = prompt.match(new RegExp(WIN_ABS_PATH_SRC, 'gi')) || [];
|
||||
for (const m of winMatches) {
|
||||
results.push(stripTrailingPunct(m));
|
||||
}
|
||||
|
||||
// 2. 상대 경로 감지: src/lib/engine.ts, components/App.tsx 등
|
||||
const relMatches = prompt.match(/(?:^|[\s,])(?:(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)\/[^\s`"'<>]+\.[a-z]{1,6})/gi) || [];
|
||||
// 2. 상대 경로 감지: src/lib/engine.ts, components/App.tsx, src\lib\engine.ts 등
|
||||
const relMatches = prompt.match(/(?:^|[\s,])(?:(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)[\\/][^\s`"'<>]+\.[a-z]{1,6})/gi) || [];
|
||||
for (const m of relMatches) {
|
||||
const cleaned = m.trim().replace(/^,\s*/, '').replace(/[),.;\]]+$/g, '');
|
||||
if (rootPath) {
|
||||
@@ -1924,6 +2125,7 @@ export class AgentExecutor {
|
||||
|
||||
private buildMemoryContext(currentPrompt: string, activeBrain: BrainProfile, agentSkillFile?: string): string {
|
||||
const config = getConfig();
|
||||
this._lastRetrievalInfo = null;
|
||||
if (!config.memoryEnabled) return '';
|
||||
|
||||
// Update memory manager config in case settings changed
|
||||
@@ -1956,6 +2158,27 @@ export class AgentExecutor {
|
||||
scopeFolders: scope.folders
|
||||
});
|
||||
|
||||
// Stash what actually fed this turn so handlePrompt can show it under the answer.
|
||||
const brainRoot = activeBrain.localBrainPath;
|
||||
const rel = (p?: string) => (p ? (path.relative(brainRoot, p) || p) : '');
|
||||
this._lastRetrievalInfo = {
|
||||
agentName: scope.agent?.name ?? null,
|
||||
scoped: scope.folders.length > 0,
|
||||
source: String((scope as any).source ?? ''),
|
||||
configuredFolders: scope.folders.map((abs) => rel(abs)),
|
||||
usedBrainFiles: result.selectedChunks
|
||||
.filter((c) => c.source === 'brain-memory' && c.metadata.filePath)
|
||||
.map((c) => rel(c.metadata.filePath))
|
||||
.filter((p, i, arr) => p && arr.indexOf(p) === i),
|
||||
usedMemoryLayers: Array.from(new Set(
|
||||
result.selectedChunks
|
||||
.filter((c) => c.source !== 'brain-memory' && c.source !== 'brain-trace')
|
||||
.map((c) => c.source as string)
|
||||
)),
|
||||
totalChunks: result.totalChunks,
|
||||
selectedChunks: result.selectedChunks.length,
|
||||
};
|
||||
|
||||
return this.retrievalOrchestrator.buildContextString(result);
|
||||
}
|
||||
|
||||
@@ -1999,8 +2222,14 @@ export class AgentExecutor {
|
||||
modelName: string;
|
||||
reqMessages: ChatMessage[];
|
||||
temperature: number;
|
||||
/** Dynamic output-token cap computed from the remaining context budget. */
|
||||
maxTokens?: number;
|
||||
/** Model context window in tokens (used for Ollama's num_ctx). */
|
||||
contextLength?: number;
|
||||
}): Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string }> {
|
||||
const { baseUrl, modelName, reqMessages, temperature } = params;
|
||||
const maxTokens = Math.max(256, params.maxTokens ?? 4096);
|
||||
const numCtx = Math.max(2048, params.contextLength ?? 32768);
|
||||
const engine = resolveEngine(baseUrl); // 사용자가 설정한 엔진만 사용
|
||||
const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
|
||||
const messageVariants = this.buildEngineMessageVariants(reqMessages, engine);
|
||||
@@ -2015,8 +2244,8 @@ export class AgentExecutor {
|
||||
messages: variant.messages,
|
||||
stream: true,
|
||||
...(engine === 'lmstudio'
|
||||
? { max_tokens: 4096, temperature }
|
||||
: { options: { num_ctx: 32768, num_predict: 4096, temperature } }),
|
||||
? { max_tokens: maxTokens, temperature }
|
||||
: { options: { num_ctx: numCtx, num_predict: maxTokens, temperature } }),
|
||||
};
|
||||
|
||||
// 일시적 네트워크 오류용 retry (최대 2회, 지수 backoff)
|
||||
@@ -2100,9 +2329,13 @@ export class AgentExecutor {
|
||||
engine: 'lmstudio' | 'ollama';
|
||||
messages: ChatMessage[];
|
||||
temperature: number;
|
||||
maxTokens?: number;
|
||||
contextLength?: number;
|
||||
signal?: AbortSignal;
|
||||
}): Promise<string> {
|
||||
}): Promise<{ text: string; stopReason?: string }> {
|
||||
const { baseUrl, modelName, engine, messages, temperature, signal } = params;
|
||||
const maxTokens = Math.max(256, params.maxTokens ?? 4096);
|
||||
const numCtx = Math.max(2048, params.contextLength ?? 32768);
|
||||
const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
|
||||
const variants = this.buildEngineMessageVariants(messages, engine);
|
||||
const body = {
|
||||
@@ -2110,8 +2343,8 @@ export class AgentExecutor {
|
||||
messages: variants[0].messages,
|
||||
stream: false,
|
||||
...(engine === 'lmstudio'
|
||||
? { max_tokens: 4096, temperature }
|
||||
: { options: { num_ctx: 32768, num_predict: 4096, temperature } }),
|
||||
? { max_tokens: maxTokens, temperature }
|
||||
: { options: { num_ctx: numCtx, num_predict: maxTokens, temperature } }),
|
||||
};
|
||||
const response = await fetch(apiUrl, {
|
||||
method: 'POST',
|
||||
@@ -2127,11 +2360,17 @@ export class AgentExecutor {
|
||||
try {
|
||||
const json = JSON.parse(text);
|
||||
if (engine === 'lmstudio') {
|
||||
return json?.choices?.[0]?.message?.content ?? '';
|
||||
return {
|
||||
text: json?.choices?.[0]?.message?.content ?? '',
|
||||
stopReason: json?.choices?.[0]?.finish_reason,
|
||||
};
|
||||
}
|
||||
return json?.message?.content ?? json?.response ?? '';
|
||||
return {
|
||||
text: json?.message?.content ?? json?.response ?? '',
|
||||
stopReason: json?.done_reason ?? (json?.done === true ? 'stop' : undefined),
|
||||
};
|
||||
} catch {
|
||||
return '';
|
||||
return { text: '' };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,11 @@ export abstract class BaseAgent {
|
||||
const isOllama = ollamaUrl.includes(':11434') || ollamaUrl.includes('ollama');
|
||||
const endpoint = isOllama ? `${ollamaUrl}/api/chat` : `${ollamaUrl}/v1/chat/completions`;
|
||||
|
||||
// 컨텍스트 초과 방지를 위해 출력 토큰 상한을 항상 명시한다 (서브에이전트 중간 산출물용).
|
||||
const { contextLength, maxOutputTokens } = getConfig();
|
||||
const numCtx = Math.max(2048, contextLength);
|
||||
const outCap = Math.max(256, maxOutputTokens);
|
||||
|
||||
let lastError: any;
|
||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||
const controller = new AbortController();
|
||||
@@ -40,12 +45,13 @@ export abstract class BaseAgent {
|
||||
model: this.modelName,
|
||||
messages,
|
||||
stream: false,
|
||||
options: { temperature: 0.3 }
|
||||
options: { temperature: 0.3, num_ctx: numCtx, num_predict: outCap }
|
||||
} : {
|
||||
model: this.modelName,
|
||||
messages,
|
||||
stream: false,
|
||||
temperature: 0.3
|
||||
temperature: 0.3,
|
||||
max_tokens: outCap
|
||||
}),
|
||||
signal: combinedSignal
|
||||
});
|
||||
|
||||
+18
-1
@@ -30,6 +30,14 @@ export interface IAgentConfig {
|
||||
memoryShortTermMessages: number;
|
||||
memoryMediumTermSessions: number;
|
||||
memoryLongTermFiles: number;
|
||||
// ─── 컨텍스트 한계 관리 ───
|
||||
contextLength: number;
|
||||
maxOutputTokens: number;
|
||||
contextSafetyMargin: number;
|
||||
contextOverflowPolicy: 'stopAtLimit' | 'truncateMiddle' | 'rollingWindow';
|
||||
autoCompactHistory: boolean;
|
||||
/** 작은 모델(≤4B) 감지 시 예산 계산에 쓸 유효 context window 상한. 0 = 비활성화. */
|
||||
smallModelContextCap: number;
|
||||
}
|
||||
|
||||
// ─── 경로 정규화 유틸리티 ───
|
||||
@@ -98,7 +106,16 @@ export function getConfig(): IAgentConfig {
|
||||
memoryEnabled: cfg.get<boolean>('memoryEnabled', true),
|
||||
memoryShortTermMessages: Math.max(0, cfg.get<number>('memoryShortTermMessages', 8)),
|
||||
memoryMediumTermSessions: Math.max(0, cfg.get<number>('memoryMediumTermSessions', 5)),
|
||||
memoryLongTermFiles: Math.max(0, cfg.get<number>('memoryLongTermFiles', 6))
|
||||
memoryLongTermFiles: Math.max(0, cfg.get<number>('memoryLongTermFiles', 6)),
|
||||
contextLength: Math.max(2048, cfg.get<number>('contextLength', 32768)),
|
||||
maxOutputTokens: Math.max(256, cfg.get<number>('maxOutputTokens', 4096)),
|
||||
contextSafetyMargin: Math.max(0, cfg.get<number>('contextSafetyMargin', 2048)),
|
||||
contextOverflowPolicy: ((): IAgentConfig['contextOverflowPolicy'] => {
|
||||
const v = cfg.get<string>('contextOverflowPolicy', 'stopAtLimit');
|
||||
return v === 'truncateMiddle' || v === 'rollingWindow' ? v : 'stopAtLimit';
|
||||
})(),
|
||||
autoCompactHistory: cfg.get<boolean>('autoCompactHistory', true),
|
||||
smallModelContextCap: Math.max(0, cfg.get<number>('smallModelContextCap', 8192))
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,254 @@
|
||||
/**
|
||||
* ============================================================
|
||||
* Context Manager (컨텍스트 한계 관리)
|
||||
*
|
||||
* "context length = 132k" 는 "답변을 132k 토큰까지 생성해도 된다" 가 아닙니다.
|
||||
* 시스템 프롬프트 + 대화 기록 + 입력 문서 + 생성될 답변 + 여유분 ≤ context length
|
||||
*
|
||||
* 이 모듈은 요청을 보내기 *전에* 입력 토큰을 추정하고,
|
||||
* - 동적으로 출력 상한(maxTokens)을 계산하고,
|
||||
* - 대화 기록이 예산을 넘으면 오래된 메시지를 잘라내고,
|
||||
* - 그래도 넘으면 시스템 프롬프트의 [CONTEXT] 블록을 마지막 수단으로 줄이고,
|
||||
* - 생성 종료 사유(stopReason / finish_reason)를 "정상 / 출력한계 / 컨텍스트초과 / 사용자중단"
|
||||
* 으로 분류해 호출자가 잘린 응답을 감지할 수 있게 합니다.
|
||||
* ============================================================
|
||||
*/
|
||||
|
||||
export type ChatRole = 'user' | 'assistant' | 'system';
|
||||
|
||||
export interface BudgetMessage {
|
||||
role: ChatRole;
|
||||
content: string;
|
||||
/** internal/system bookkeeping messages that should be kept verbatim where possible */
|
||||
internal?: boolean;
|
||||
}
|
||||
|
||||
export interface ContextLimits {
|
||||
/** 모델의 context window (프롬프트 + 생성 합산 한계). */
|
||||
contextLength: number;
|
||||
/** 한 응답에서 생성할 토큰 수의 상한 (이 값을 넘기지 않음). */
|
||||
maxOutputTokens: number;
|
||||
/** 추정 오차를 흡수하기 위한 여유분. */
|
||||
safetyMargin: number;
|
||||
/** 출력에 항상 확보해 둘 최소 토큰 수. */
|
||||
minOutputTokens: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_CONTEXT_LIMITS: ContextLimits = {
|
||||
contextLength: 32768,
|
||||
maxOutputTokens: 4096,
|
||||
safetyMargin: 2048,
|
||||
minOutputTokens: 512,
|
||||
};
|
||||
|
||||
/** LM Studio `contextOverflowPolicy` 값 — 우리가 예산 계산에 실패했을 때의 안전망. */
|
||||
export type ContextOverflowPolicy = 'stopAtLimit' | 'truncateMiddle' | 'rollingWindow';
|
||||
export const DEFAULT_OVERFLOW_POLICY: ContextOverflowPolicy = 'stopAtLimit';
|
||||
|
||||
/**
|
||||
* 텍스트의 토큰 수를 대략 추정합니다.
|
||||
*
|
||||
* 정밀한 토크나이저가 없으므로 문자 기반 휴리스틱을 사용합니다:
|
||||
* - CJK(한/중/일) 글자: ~1.6 토큰/글자 (byte-level BPE 기준 보수적)
|
||||
* - 그 외(영문/코드/기호): ~0.30 토큰/글자
|
||||
* 약간 과대평가하는 쪽으로 잡아 컨텍스트 초과를 예방합니다.
|
||||
*/
|
||||
export function estimateTokens(text: string): number {
|
||||
if (!text) return 0;
|
||||
const cjkChars = (text.match(/[ -〿-ヿ㐀-䶿一-鿿가--]/g) || []).length;
|
||||
const otherChars = text.length - cjkChars;
|
||||
return Math.ceil(cjkChars * 1.6 + otherChars * 0.3);
|
||||
}
|
||||
|
||||
/**
|
||||
* 모델 식별자에서 파라미터 규모(B 단위)를 대략 추출합니다. 모르면 null.
|
||||
* 예: "qwen2.5-7b" → 7, "llama-3.1-8b-instruct" → 8, "gemma-3n-e2b" / "gemma4:e2b" → 2,
|
||||
* "phi-3-mini" → null (숫자 없음), "qwen3-30b-a3b" → 30. "4bit" 같은 양자화 표기는 매칭 안 됨.
|
||||
*/
|
||||
export function estimateModelParamsB(modelId: string | null | undefined): number | null {
|
||||
if (!modelId) return null;
|
||||
const m = String(modelId).match(/(?:^|[-_/:.\s])e?(\d+(?:\.\d+)?)\s*b(?![a-z0-9])/i);
|
||||
if (!m) return null;
|
||||
const n = Number(m[1]);
|
||||
return Number.isFinite(n) && n > 0 && n < 2000 ? n : null;
|
||||
}
|
||||
|
||||
/** role/구분자 등 메시지 1개당 발생하는 고정 오버헤드(대략). */
|
||||
const PER_MESSAGE_TOKEN_OVERHEAD = 4;
|
||||
|
||||
export function estimateMessageTokens(msg: BudgetMessage): number {
|
||||
return estimateTokens(msg.content || '') + PER_MESSAGE_TOKEN_OVERHEAD;
|
||||
}
|
||||
|
||||
export function estimateMessagesTokens(messages: BudgetMessage[]): number {
|
||||
return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* 입력 토큰 수가 주어졌을 때 안전하게 생성할 수 있는 출력 토큰 상한을 계산합니다.
|
||||
*
|
||||
* available = contextLength - inputTokens - safetyMargin
|
||||
* maxOutput = clamp(available, minOutputTokens, maxOutputTokens)
|
||||
*
|
||||
* available 이 minOutputTokens 보다 작으면 입력이 이미 컨텍스트를 거의 다 먹은 상태이므로
|
||||
* `tight: true` 와 함께 minOutputTokens 를 그대로 돌려줍니다 (호출자가 추가로 줄여야 함).
|
||||
*/
|
||||
export function computeOutputBudget(
|
||||
inputTokens: number,
|
||||
limits: ContextLimits = DEFAULT_CONTEXT_LIMITS
|
||||
): { maxOutputTokens: number; available: number; tight: boolean } {
|
||||
const { contextLength, maxOutputTokens, safetyMargin, minOutputTokens } = limits;
|
||||
const available = contextLength - inputTokens - safetyMargin;
|
||||
if (available <= minOutputTokens) {
|
||||
return { maxOutputTokens: minOutputTokens, available, tight: true };
|
||||
}
|
||||
return {
|
||||
maxOutputTokens: Math.max(minOutputTokens, Math.min(available, maxOutputTokens)),
|
||||
available,
|
||||
tight: false,
|
||||
};
|
||||
}
|
||||
|
||||
export interface TrimResult<M extends BudgetMessage> {
|
||||
messages: M[];
|
||||
/** 잘려나간 메시지 개수 (0 이면 변화 없음). */
|
||||
droppedCount: number;
|
||||
/** 잘라낸 뒤의 입력 토큰 추정치. */
|
||||
tokensAfter: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 대화 기록을 토큰 예산 안에 맞춥니다.
|
||||
*
|
||||
* 전략:
|
||||
* 1. 항상 마지막 메시지(보통 현재 사용자 질문)는 유지.
|
||||
* 2. 최근 메시지부터 역순으로 예산이 허용하는 만큼 채움.
|
||||
* 3. 하나라도 잘렸으면 맨 앞에 `[이전 대화 N개 생략]` 마커를 끼워 모델이 맥락 누락을 인지하게 함.
|
||||
*
|
||||
* 주의: 여기서 잘라내는 것은 *요청에 보낼* 메시지 배열일 뿐, UI에 표시되는 전체 기록은 그대로 둡니다.
|
||||
*/
|
||||
export function trimHistoryToBudget<M extends BudgetMessage>(
|
||||
messages: M[],
|
||||
budgetTokens: number,
|
||||
makeMarker: (droppedCount: number) => M
|
||||
): TrimResult<M> {
|
||||
if (messages.length === 0) {
|
||||
return { messages, droppedCount: 0, tokensAfter: 0 };
|
||||
}
|
||||
const total = estimateMessagesTokens(messages);
|
||||
if (total <= budgetTokens) {
|
||||
return { messages, droppedCount: 0, tokensAfter: total };
|
||||
}
|
||||
|
||||
// 최근 메시지부터 역순으로 채움. 최소 1개(마지막 메시지)는 무조건 유지.
|
||||
const kept: M[] = [];
|
||||
let used = 0;
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const t = estimateMessageTokens(messages[i]);
|
||||
if (kept.length > 0 && used + t > budgetTokens) {
|
||||
break;
|
||||
}
|
||||
kept.unshift(messages[i]);
|
||||
used += t;
|
||||
}
|
||||
|
||||
const droppedCount = messages.length - kept.length;
|
||||
if (droppedCount > 0) {
|
||||
const marker = makeMarker(droppedCount);
|
||||
kept.unshift(marker);
|
||||
used += estimateMessageTokens(marker);
|
||||
}
|
||||
return { messages: kept, droppedCount, tokensAfter: used };
|
||||
}
|
||||
|
||||
/** 시스템 프롬프트 안에서 "잘라내도 되는" 보조 컨텍스트 영역의 시작/끝 마커. */
|
||||
export const CONTEXT_OPEN_MARKER = '\n\n[CONTEXT]\n';
|
||||
export const CONTEXT_CLOSE_MARKER = '\n[/CONTEXT]\n';
|
||||
|
||||
/**
|
||||
* 시스템 프롬프트가 너무 클 때 마지막 수단으로 `[CONTEXT] … [/CONTEXT]` 사이의 보조 컨텍스트
|
||||
* (브레인/메모리/열린 파일/RAG 등 — 조립 단계에서 끼워 넣는 데이터)만 잘라냅니다.
|
||||
* 핵심 지시문(앞부분)과 마무리 지시문(예: negative constraints, agent system prompt — 뒷부분)은
|
||||
* 절대 건드리지 않습니다. `[/CONTEXT]` 마커가 없으면 `[CONTEXT]` 이후 전체를 trim 대상으로 봅니다.
|
||||
*
|
||||
* @param systemPrompt 조립이 끝난 전체 시스템 프롬프트
|
||||
* @param maxTokens 시스템 프롬프트에 허용할 토큰 상한
|
||||
*/
|
||||
export function truncateSystemPromptContext(
|
||||
systemPrompt: string,
|
||||
maxTokens: number
|
||||
): { prompt: string; truncated: boolean } {
|
||||
if (estimateTokens(systemPrompt) <= maxTokens) {
|
||||
return { prompt: systemPrompt, truncated: false };
|
||||
}
|
||||
const openIdx = systemPrompt.indexOf(CONTEXT_OPEN_MARKER);
|
||||
if (openIdx < 0) {
|
||||
// 보조 컨텍스트 영역이 없으면 전체에서 뒤를 잘라낼 수밖에 없음.
|
||||
const approxChars = Math.max(1000, Math.floor(maxTokens / 0.3));
|
||||
return {
|
||||
prompt: systemPrompt.slice(0, approxChars) + '\n\n[…시스템 프롬프트가 컨텍스트 한계로 잘렸습니다…]',
|
||||
truncated: true,
|
||||
};
|
||||
}
|
||||
const bodyStart = openIdx + CONTEXT_OPEN_MARKER.length;
|
||||
const closeIdx = systemPrompt.indexOf(CONTEXT_CLOSE_MARKER, bodyStart);
|
||||
const head = systemPrompt.slice(0, bodyStart); // 지시문 + "[CONTEXT]\n"
|
||||
const body = closeIdx >= 0 ? systemPrompt.slice(bodyStart, closeIdx) : systemPrompt.slice(bodyStart);
|
||||
const tail = closeIdx >= 0 ? systemPrompt.slice(closeIdx) : ''; // "[/CONTEXT]" + negative/agent 등
|
||||
|
||||
const fixedTokens = estimateTokens(head) + estimateTokens(tail);
|
||||
const remainForBody = maxTokens - fixedTokens - 64;
|
||||
if (remainForBody <= 0) {
|
||||
return {
|
||||
prompt: head + '[…보조 컨텍스트는 컨텍스트 한계로 모두 생략되었습니다…]' + tail,
|
||||
truncated: true,
|
||||
};
|
||||
}
|
||||
// CJK 비중에 따라 글자수→토큰 비율이 달라지므로 보수적으로 0.4 토큰/글자로 환산.
|
||||
const approxChars = Math.floor(remainForBody / 0.4);
|
||||
const trimmedBody = body.length <= approxChars
|
||||
? body
|
||||
: body.slice(0, approxChars) + '\n\n[…이하 보조 컨텍스트는 컨텍스트 한계로 생략됨…]';
|
||||
return { prompt: head + trimmedBody + tail, truncated: true };
|
||||
}
|
||||
|
||||
export type GenerationStopKind =
|
||||
| 'complete' // 정상 종료 (EOS / stop string)
|
||||
| 'output-limit' // maxTokens 도달 — 답변이 중간에 잘림
|
||||
| 'context-overflow'// 입력+출력이 context window 초과
|
||||
| 'user-stopped' // 사용자 취소
|
||||
| 'tool-calls' // 툴 호출로 종료
|
||||
| 'error' // 모델/런타임 오류
|
||||
| 'unknown';
|
||||
|
||||
/**
|
||||
* 엔진별 종료 사유 문자열을 공통 분류값으로 정규화합니다.
|
||||
* - LM Studio SDK: `stats.stopReason` — eosFound / stopStringFound / maxPredictedTokensReached / contextLengthReached / userStopped / toolCalls / failed / modelUnloaded
|
||||
* - OpenAI 호환 REST: `choices[].finish_reason` — stop / length / tool_calls / content_filter
|
||||
* - Ollama: `done_reason` — stop / length / load
|
||||
*/
|
||||
export function classifyStopReason(raw: string | null | undefined): GenerationStopKind {
|
||||
if (!raw) return 'unknown';
|
||||
const r = String(raw).toLowerCase();
|
||||
if (/(maxpredictedtokensreached|^length$|max_tokens)/.test(r)) return 'output-limit';
|
||||
if (/(contextlengthreached|context_length|context_overflow|contextoverflow)/.test(r)) return 'context-overflow';
|
||||
if (/(eosfound|stopstringfound|^stop$|^end$|stop_sequence|content_filter)/.test(r)) return 'complete';
|
||||
if (/(userstopped|aborted|cancel)/.test(r)) return 'user-stopped';
|
||||
if (/(toolcalls|tool_calls)/.test(r)) return 'tool-calls';
|
||||
if (/(failed|error|modelunloaded)/.test(r)) return 'error';
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
/** 잘린 응답일 때 사용자에게 덧붙일 한 줄 안내. 정상 종료면 빈 문자열. */
|
||||
export function truncationNotice(kind: GenerationStopKind): string {
|
||||
switch (kind) {
|
||||
case 'output-limit':
|
||||
return '\n\n> ⚠️ 답변이 출력 토큰 한계에 도달해 잘렸습니다. "이어서 작성해줘" 라고 요청하면 계속 생성합니다.';
|
||||
case 'context-overflow':
|
||||
return '\n\n> ⚠️ 입력 컨텍스트가 모델의 context window 를 초과했습니다. 대화를 새로 시작하거나(`/newChat`) Settings 에서 `g1nation.contextLength` 를 모델 실제 값으로 맞추고, Brain/Skill 컨텍스트를 줄여보세요.';
|
||||
case 'error':
|
||||
return '\n\n> ⚠️ 모델이 비정상 종료했습니다 (컨텍스트 초과 또는 모델 용량 부족 가능). 더 큰 모델로 바꾸거나 컨텍스트를 줄여보세요.';
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
}
|
||||
+16
-11
@@ -82,19 +82,20 @@ export function resolveBrainDirFromConfig(): string {
|
||||
* `_sendAgentsList` and `_createAgent` operate on).
|
||||
*
|
||||
* Resolution order:
|
||||
* 1. The first VS Code workspace folder + `/.agent/skills/` (creating the
|
||||
* 1. VS Code config `g1nation.agentSkillsPath` (after `~` + abs-path normalization),
|
||||
* if the user explicitly pointed at a folder.
|
||||
* 2. The first VS Code workspace folder + `/.agent/skills/` (creating the
|
||||
* folder is the caller's responsibility).
|
||||
* 2. Empty string when no workspace is open — callers must short-circuit.
|
||||
* 3. Empty string when no workspace is open — callers must short-circuit.
|
||||
*
|
||||
* The legacy default `E:\Wiki\Agent\.agent\skills` from sidebarProvider.ts is
|
||||
* preserved as a fall-through hint for the original author's machine.
|
||||
* Note: a previous version hard-coded `E:\Wiki\Agent\.agent\skills` as a
|
||||
* fall-through for the original author's Windows machine. That made behavior
|
||||
* differ between machines (and never matched anything on macOS/Linux), so it
|
||||
* was removed — use `g1nation.agentSkillsPath` for a non-workspace location.
|
||||
*/
|
||||
export function resolveAgentSkillsDir(): string {
|
||||
const legacy = 'E:\\Wiki\\Agent\\.agent\\skills';
|
||||
try {
|
||||
const fs = require('fs') as typeof import('fs');
|
||||
if (fs.existsSync(legacy)) return legacy;
|
||||
} catch { /* fs unavailable in some isolated tests */ }
|
||||
const configured = resolvePathInput(_safeGetConfigString('g1nation', 'agentSkillsPath'));
|
||||
if (configured) return configured;
|
||||
|
||||
const folders = vscode.workspace.workspaceFolders;
|
||||
if (folders && folders.length > 0) {
|
||||
@@ -111,8 +112,12 @@ export function resolveAgentSkillsDir(): string {
|
||||
*/
|
||||
export function isInside(parent: string, child: string): boolean {
|
||||
if (!parent || !child) return false;
|
||||
const p = path.resolve(parent);
|
||||
const c = path.resolve(child);
|
||||
// Windows file systems are case-insensitive and path.resolve may emit a
|
||||
// mixed-case drive letter, so normalize case there before comparing —
|
||||
// otherwise legitimate writes get rejected just because of casing.
|
||||
const norm = (p: string) => (process.platform === 'win32' ? path.resolve(p).toLowerCase() : path.resolve(p));
|
||||
const p = norm(parent);
|
||||
const c = norm(child);
|
||||
if (c === p) return true;
|
||||
return c.startsWith(p + path.sep);
|
||||
}
|
||||
|
||||
@@ -11,13 +11,26 @@ export interface ChatStreamRequest {
|
||||
modelName: string;
|
||||
messages: ChatStreamMessage[];
|
||||
temperature: number;
|
||||
/** Upper bound on tokens to generate. Omit to fall back to a conservative default. */
|
||||
maxTokens?: number;
|
||||
/** LM Studio context-overflow safety net used only if the prompt still exceeds the window. */
|
||||
contextOverflowPolicy?: 'stopAtLimit' | 'truncateMiddle' | 'rollingWindow';
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
/**
|
||||
* One stream event. `token` carries generated text (possibly empty for the final event);
|
||||
* `stopReason` is set on the *last* event only and is the SDK's `stats.stopReason`
|
||||
* (e.g. `eosFound`, `maxPredictedTokensReached`, `contextLengthReached`, `userStopped`).
|
||||
*/
|
||||
export interface ChatStreamEvent {
|
||||
token: string;
|
||||
stopReason?: string;
|
||||
}
|
||||
|
||||
export interface IChatStreamer {
|
||||
/** Token-level streaming for an LM Studio chat completion via the WebSocket SDK. */
|
||||
stream(req: ChatStreamRequest): AsyncIterable<{ token: string }>;
|
||||
stream(req: ChatStreamRequest): AsyncIterable<ChatStreamEvent>;
|
||||
/**
|
||||
* Drop the SDK's cached handle for `modelName`. Callers invoke this when
|
||||
* the previous stream returned zero tokens with no error — a symptom of a
|
||||
@@ -39,7 +52,7 @@ export interface IChatStreamer {
|
||||
export class LMStudioStreamer implements IChatStreamer {
|
||||
constructor(private readonly client: ILMStudioClient) {}
|
||||
|
||||
async *stream(req: ChatStreamRequest): AsyncIterable<{ token: string }> {
|
||||
async *stream(req: ChatStreamRequest): AsyncIterable<ChatStreamEvent> {
|
||||
const trimmedModel = (req.modelName || '').trim();
|
||||
if (!trimmedModel) {
|
||||
throw new LMStudioLifecycleError('LMStudioStreamer.stream called without a model name.');
|
||||
@@ -62,6 +75,10 @@ export class LMStudioStreamer implements IChatStreamer {
|
||||
const prediction = (model as any).respond(req.messages, {
|
||||
temperature: req.temperature,
|
||||
maxTokens: req.maxTokens ?? 4096,
|
||||
// Safety net: if our own token budgeting still underestimated and the prompt
|
||||
// exceeds the model's context window, decide whether the SDK should fail
|
||||
// loudly (stopAtLimit — default) or silently drop content.
|
||||
contextOverflowPolicy: req.contextOverflowPolicy ?? 'stopAtLimit',
|
||||
signal: req.signal,
|
||||
});
|
||||
|
||||
@@ -98,7 +115,22 @@ export class LMStudioStreamer implements IChatStreamer {
|
||||
req.signal?.removeEventListener?.('abort', onAbort);
|
||||
}
|
||||
|
||||
if (!caught) return;
|
||||
if (!caught) {
|
||||
if (req.signal?.aborted) return;
|
||||
// The prediction object is also a Promise<PredictionResult>; awaiting it after
|
||||
// the stream drains gives us stats.stopReason so callers can tell a truncated
|
||||
// answer (maxPredictedTokensReached / contextLengthReached) from a normal one.
|
||||
let stopReason: string | undefined;
|
||||
try {
|
||||
const result: any = await prediction;
|
||||
stopReason = result?.stats?.stopReason;
|
||||
if (stopReason) {
|
||||
logInfo('LM Studio SDK chat stream finished.', { model: trimmedModel, stopReason, tokensYielded: yielded });
|
||||
}
|
||||
} catch { /* result unavailable on some SDK versions — non-fatal */ }
|
||||
yield { token: '', stopReason: stopReason ?? 'eosFound' };
|
||||
return;
|
||||
}
|
||||
|
||||
const errMsg = String(caught?.message ?? caught);
|
||||
const handleDead = /\bdisposed\b/i.test(errMsg)
|
||||
|
||||
@@ -0,0 +1,220 @@
|
||||
/**
|
||||
* ============================================================
|
||||
* Brain Index — persistent, mtime-keyed tokenized cache of the Second Brain
|
||||
*
|
||||
* RAG 검색은 매 질의마다 브레인의 모든 .md 파일을 읽고 토크나이즈해서 TF-IDF 점수를
|
||||
* 계산했습니다 — 파일 수가 많아지면 그게 병목입니다.
|
||||
*
|
||||
* 이 모듈은 `<brainPath>/.astra/brain-index.json` 에 파일별 토큰 배열을 (mtime+size 키로)
|
||||
* 저장해 두고, 다음 질의에서는 *변경된 파일만* 다시 읽어 토크나이즈합니다. 나머지는 디스크/메모리
|
||||
* 캐시에서 그대로 가져옵니다. 디스크 쓰기는 디바운스되고 실패해도 in-memory 로만 동작합니다.
|
||||
* ============================================================
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { tokenize, countConflictIndicators } from './scoring';
|
||||
import { logInfo } from '../utils';
|
||||
|
||||
const INDEX_VERSION = 2;
|
||||
const INDEX_DIR = '.astra';
|
||||
const INDEX_FILE = 'brain-index.json';
|
||||
/** 인덱스가 이 개수를 넘으면 이번 스캔에서 못 본 항목을 정리합니다 (삭제된 파일 누적 방지). */
|
||||
const MAX_INDEX_ENTRIES = 12000;
|
||||
/** 디스크 쓰기 디바운스. */
|
||||
const WRITE_DEBOUNCE_MS = 1500;
|
||||
|
||||
interface IndexEntry {
|
||||
mtimeMs: number;
|
||||
size: number;
|
||||
title: string; // basename without .md
|
||||
relativePath: string; // relative to brainPath
|
||||
tokens: string[]; // tokenize(`${title} ${content}`)
|
||||
titleTokens: string[]; // tokenize(title)
|
||||
conflictCount: number; // countConflictIndicators(`${title} ${content}`)
|
||||
}
|
||||
|
||||
interface PersistedIndex {
|
||||
version: number;
|
||||
entries: Record<string, IndexEntry>; // keyed by absolute file path
|
||||
}
|
||||
|
||||
export interface IndexedBrainDoc {
|
||||
filePath: string;
|
||||
relativePath: string;
|
||||
title: string;
|
||||
tokens: string[];
|
||||
titleTokens: string[];
|
||||
conflictCount: number;
|
||||
mtimeMs: number;
|
||||
}
|
||||
|
||||
interface BrainState {
|
||||
index: PersistedIndex;
|
||||
dirty: boolean;
|
||||
diskPath: string | null; // null if we can't determine a writable path
|
||||
writeTimer?: ReturnType<typeof setTimeout>;
|
||||
}
|
||||
|
||||
const _states = new Map<string, BrainState>();
|
||||
|
||||
function indexFileFor(brainPath: string): string {
|
||||
return path.join(brainPath, INDEX_DIR, INDEX_FILE);
|
||||
}
|
||||
|
||||
function loadState(brainPath: string): BrainState {
|
||||
const existing = _states.get(brainPath);
|
||||
if (existing) return existing;
|
||||
|
||||
let index: PersistedIndex = { version: INDEX_VERSION, entries: {} };
|
||||
let diskPath: string | null = null;
|
||||
try {
|
||||
diskPath = indexFileFor(brainPath);
|
||||
if (fs.existsSync(diskPath)) {
|
||||
const raw = JSON.parse(fs.readFileSync(diskPath, 'utf8'));
|
||||
if (raw && raw.version === INDEX_VERSION && raw.entries && typeof raw.entries === 'object') {
|
||||
index = raw as PersistedIndex;
|
||||
} else {
|
||||
logInfo('Brain index is stale/unrecognized — rebuilding.', { brainPath });
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
logInfo('Brain index load failed — starting fresh.', { brainPath, error: e?.message || String(e) });
|
||||
index = { version: INDEX_VERSION, entries: {} };
|
||||
}
|
||||
const st: BrainState = { index, dirty: false, diskPath };
|
||||
_states.set(brainPath, st);
|
||||
return st;
|
||||
}
|
||||
|
||||
function scheduleWrite(st: BrainState, brainPath: string): void {
|
||||
if (!st.dirty || !st.diskPath || st.writeTimer) return;
|
||||
const timer = setTimeout(() => {
|
||||
st.writeTimer = undefined;
|
||||
if (!st.dirty || !st.diskPath) return;
|
||||
try {
|
||||
const dir = path.dirname(st.diskPath);
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
// One-time .gitignore so the cache dir never gets committed into a Second Brain git repo.
|
||||
const gi = path.join(dir, '.gitignore');
|
||||
if (!fs.existsSync(gi)) {
|
||||
try { fs.writeFileSync(gi, '*\n', 'utf8'); } catch { /* non-fatal */ }
|
||||
}
|
||||
const tmp = `${st.diskPath}.tmp`;
|
||||
fs.writeFileSync(tmp, JSON.stringify(st.index), 'utf8');
|
||||
fs.renameSync(tmp, st.diskPath);
|
||||
st.dirty = false;
|
||||
} catch (e: any) {
|
||||
logInfo('Brain index write failed (continuing in-memory only).', { brainPath, error: e?.message || String(e) });
|
||||
}
|
||||
}, WRITE_DEBOUNCE_MS);
|
||||
if (typeof (timer as any).unref === 'function') (timer as any).unref();
|
||||
st.writeTimer = timer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns tokenized representations for `files` (absolute brain-file paths, already
|
||||
* scoped/filtered by the caller). Unchanged files are served from the index; changed/new
|
||||
* files are read & tokenized and the index is updated (debounced disk write).
|
||||
*
|
||||
* Safe to call with an empty/invalid `brainPath` or empty list — returns [].
|
||||
*/
|
||||
export function getBrainTokenIndex(brainPath: string, files: string[]): IndexedBrainDoc[] {
|
||||
if (!brainPath || !Array.isArray(files) || files.length === 0) return [];
|
||||
const st = loadState(brainPath);
|
||||
const out: IndexedBrainDoc[] = [];
|
||||
const seen = new Set<string>();
|
||||
let reindexed = 0;
|
||||
|
||||
for (const file of files) {
|
||||
seen.add(file);
|
||||
let stat: fs.Stats;
|
||||
try {
|
||||
stat = fs.statSync(file);
|
||||
} catch {
|
||||
continue; // listed but gone now — skip silently
|
||||
}
|
||||
const cached = st.index.entries[file];
|
||||
if (cached
|
||||
&& cached.mtimeMs === stat.mtimeMs
|
||||
&& cached.size === stat.size
|
||||
&& Array.isArray(cached.tokens)
|
||||
&& Array.isArray(cached.titleTokens)) {
|
||||
out.push({
|
||||
filePath: file,
|
||||
relativePath: cached.relativePath,
|
||||
title: cached.title,
|
||||
tokens: cached.tokens,
|
||||
titleTokens: cached.titleTokens,
|
||||
conflictCount: cached.conflictCount || 0,
|
||||
mtimeMs: cached.mtimeMs,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// (Re)index this file.
|
||||
let content = '';
|
||||
try {
|
||||
content = fs.readFileSync(file, 'utf8');
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const relativePath = path.relative(brainPath, file);
|
||||
const title = path.basename(file, '.md');
|
||||
const combined = `${title} ${content}`;
|
||||
const entry: IndexEntry = {
|
||||
mtimeMs: stat.mtimeMs,
|
||||
size: stat.size,
|
||||
title,
|
||||
relativePath,
|
||||
tokens: tokenize(combined),
|
||||
titleTokens: tokenize(title),
|
||||
conflictCount: countConflictIndicators(combined),
|
||||
};
|
||||
st.index.entries[file] = entry;
|
||||
st.dirty = true;
|
||||
reindexed++;
|
||||
out.push({
|
||||
filePath: file,
|
||||
relativePath,
|
||||
title,
|
||||
tokens: entry.tokens,
|
||||
titleTokens: entry.titleTokens,
|
||||
conflictCount: entry.conflictCount,
|
||||
mtimeMs: entry.mtimeMs,
|
||||
});
|
||||
}
|
||||
|
||||
// Prune stale entries. We only prune when this looked like a (near-)full scan — i.e. we saw
|
||||
// most of the index — so an agent-scoped query doesn't evict cache for out-of-scope files.
|
||||
// (Falls back to a hard prune if the index has grown beyond MAX_INDEX_ENTRIES.)
|
||||
const entryKeys = Object.keys(st.index.entries);
|
||||
const looksFullScan = seen.size >= entryKeys.length * 0.8;
|
||||
if (looksFullScan || entryKeys.length > MAX_INDEX_ENTRIES) {
|
||||
for (const key of entryKeys) {
|
||||
if (!seen.has(key)) {
|
||||
delete st.index.entries[key];
|
||||
st.dirty = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (reindexed > 0) {
|
||||
logInfo('Brain index updated.', { brainPath, files: files.length, reindexed, totalEntries: Object.keys(st.index.entries).length });
|
||||
}
|
||||
if (st.dirty) scheduleWrite(st, brainPath);
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Drop the in-memory index (and pending write) for one brain, or all brains. The disk file is left as-is. */
|
||||
export function clearBrainTokenIndex(brainPath?: string): void {
|
||||
if (brainPath === undefined) {
|
||||
for (const st of _states.values()) {
|
||||
if (st.writeTimer) { clearTimeout(st.writeTimer); st.writeTimer = undefined; }
|
||||
}
|
||||
_states.clear();
|
||||
return;
|
||||
}
|
||||
const st = _states.get(brainPath);
|
||||
if (st?.writeTimer) clearTimeout(st.writeTimer);
|
||||
_states.delete(brainPath);
|
||||
}
|
||||
+44
-46
@@ -19,11 +19,13 @@ import { findBrainFiles, summarizeText } from '../utils';
|
||||
import { isInside } from '../lib/paths';
|
||||
import { MemoryManager } from '../memory';
|
||||
import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types';
|
||||
import { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from './scoring';
|
||||
import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
|
||||
import { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
|
||||
import { getBrainTokenIndex } from './brainIndex';
|
||||
|
||||
export { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from './scoring';
|
||||
export { tokenize, expandQuery, scoreTfIdf, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
|
||||
export { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
|
||||
export { getBrainTokenIndex, clearBrainTokenIndex } from './brainIndex';
|
||||
export * from './types';
|
||||
|
||||
interface RetrievalOptions {
|
||||
@@ -133,52 +135,48 @@ export class RetrievalOrchestrator {
|
||||
|
||||
if (allFiles.length === 0) return [];
|
||||
|
||||
// Read all files for TF-IDF
|
||||
const documents = allFiles.map((file) => {
|
||||
// Tokenized docs from the persistent mtime-keyed index — unchanged files are not re-read
|
||||
// or re-tokenized, so per-query work over a large brain drops from O(total content) to O(files) stats.
|
||||
const indexed = getBrainTokenIndex(brain.localBrainPath, allFiles);
|
||||
if (indexed.length === 0) return [];
|
||||
|
||||
const scored = scoreTfIdfPreTokenized(
|
||||
expandedTokens,
|
||||
indexed.map((d) => ({
|
||||
tokens: d.tokens,
|
||||
titleTokens: d.titleTokens,
|
||||
lastModified: d.mtimeMs,
|
||||
conflictCount: d.conflictCount,
|
||||
}))
|
||||
);
|
||||
|
||||
const topResults: RetrievalChunk[] = [];
|
||||
for (const s of scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score).slice(0, limit)) {
|
||||
const doc = indexed[s.index];
|
||||
// Only the top `limit` files are actually read off disk (for excerpt extraction).
|
||||
let content = '';
|
||||
let lastModified = 0;
|
||||
try {
|
||||
content = fs.readFileSync(file, 'utf8');
|
||||
lastModified = fs.statSync(file).mtimeMs;
|
||||
} catch { /* skip */ }
|
||||
return {
|
||||
title: path.basename(file, '.md'),
|
||||
content,
|
||||
lastModified,
|
||||
filePath: file,
|
||||
relativePath: path.relative(brain.localBrainPath, file)
|
||||
};
|
||||
});
|
||||
|
||||
// TF-IDF scoring
|
||||
const scored = scoreTfIdf(expandedTokens, documents);
|
||||
|
||||
return scored
|
||||
.filter((s) => s.score > 0)
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, limit)
|
||||
.map((s) => {
|
||||
const doc = documents[s.index];
|
||||
const excerpt = extractBestExcerpt(doc.content, expandedTokens, 400);
|
||||
return {
|
||||
id: `brain-${s.index}`,
|
||||
source: 'brain-memory' as const,
|
||||
title: doc.relativePath,
|
||||
content: summarizeText(excerpt, 400),
|
||||
score: s.score,
|
||||
tokenEstimate: estimateTokens(excerpt),
|
||||
metadata: {
|
||||
filePath: doc.filePath,
|
||||
category: this.inferCategory(doc.relativePath),
|
||||
isProjectEvidence: this.isProjectEvidence(doc.relativePath, doc.content),
|
||||
lastUpdated: doc.lastModified,
|
||||
// Phase 5: Scoring Intelligence Integration
|
||||
conflictDetected: s.conflictDetected,
|
||||
conflictSeverity: s.conflictSeverity,
|
||||
informationDensity: s.informationDensity
|
||||
}
|
||||
};
|
||||
try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — skip */ continue; }
|
||||
const excerpt = extractBestExcerpt(content, expandedTokens, 400);
|
||||
topResults.push({
|
||||
id: `brain-${s.index}`,
|
||||
source: 'brain-memory' as const,
|
||||
title: doc.relativePath,
|
||||
content: summarizeText(excerpt, 400),
|
||||
score: s.score,
|
||||
tokenEstimate: estimateTokens(excerpt),
|
||||
metadata: {
|
||||
filePath: doc.filePath,
|
||||
category: this.inferCategory(doc.relativePath),
|
||||
isProjectEvidence: this.isProjectEvidence(doc.relativePath, content),
|
||||
lastUpdated: doc.mtimeMs,
|
||||
// Phase 5: Scoring Intelligence Integration
|
||||
conflictDetected: s.conflictDetected,
|
||||
conflictSeverity: s.conflictSeverity,
|
||||
informationDensity: s.informationDensity,
|
||||
},
|
||||
});
|
||||
}
|
||||
return topResults;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
||||
+54
-15
@@ -160,6 +160,30 @@ function inverseDocumentFrequency(
|
||||
|
||||
export type ConflictSeverity = 'NONE' | 'LOW' | 'MEDIUM' | 'HIGH';
|
||||
|
||||
/**
|
||||
* Counts how many distinct conflict-indicator words are present (substring match) in `rawText`.
|
||||
* Exposed so the brain index can cache this per-file instead of re-scanning content every query.
|
||||
*/
|
||||
export function countConflictIndicators(rawText: string): number {
|
||||
const lower = (rawText || '').toLowerCase();
|
||||
let n = 0;
|
||||
for (const indicator of SCORING_CONFIG.CONFLICT_INDICATORS) {
|
||||
if (lower.includes(indicator.toLowerCase())) n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/** A document whose tokens were already computed (e.g. from the persistent brain index). */
|
||||
export interface PreTokenizedDoc {
|
||||
/** tokenize(`${title} ${content}`) */
|
||||
tokens: string[];
|
||||
/** tokenize(title) */
|
||||
titleTokens: string[];
|
||||
lastModified?: number;
|
||||
/** result of countConflictIndicators(`${title} ${content}`); 0 if unknown */
|
||||
conflictCount: number;
|
||||
}
|
||||
|
||||
export interface ScoredDocument {
|
||||
index: number;
|
||||
score: number;
|
||||
@@ -173,6 +197,8 @@ export interface ScoredDocument {
|
||||
|
||||
/**
|
||||
* TF-IDF 기반으로 문서 집합을 스코어링합니다.
|
||||
* 문서 내용을 받아 즉석에서 토크나이즈합니다 — 이미 토큰화된 집합이 있다면
|
||||
* `scoreTfIdfPreTokenized` 를 직접 호출하면 토크나이즈를 건너뛸 수 있습니다.
|
||||
*/
|
||||
export function scoreTfIdf(
|
||||
queryTokens: string[],
|
||||
@@ -183,11 +209,28 @@ export function scoreTfIdf(
|
||||
}>
|
||||
): ScoredDocument[] {
|
||||
if (documents.length === 0 || queryTokens.length === 0) return [];
|
||||
return scoreTfIdfPreTokenized(queryTokens, documents.map((doc) => {
|
||||
const combined = `${doc.title} ${doc.content}`;
|
||||
return {
|
||||
tokens: tokenize(combined),
|
||||
titleTokens: tokenize(doc.title),
|
||||
lastModified: doc.lastModified,
|
||||
conflictCount: countConflictIndicators(combined),
|
||||
};
|
||||
}));
|
||||
}
|
||||
|
||||
// Pre-tokenize all documents
|
||||
const docTokenArrays = documents.map((doc) =>
|
||||
tokenize(`${doc.title} ${doc.content}`)
|
||||
);
|
||||
/**
|
||||
* TF-IDF 스코어링 — 이미 토큰화된 문서 집합 버전 (브레인 인덱스 등 캐시된 토큰을 그대로 사용).
|
||||
* `scoreTfIdf` 와 동일한 알고리즘이며 출력 형태도 같습니다.
|
||||
*/
|
||||
export function scoreTfIdfPreTokenized(
|
||||
queryTokens: string[],
|
||||
documents: PreTokenizedDoc[]
|
||||
): ScoredDocument[] {
|
||||
if (documents.length === 0 || queryTokens.length === 0) return [];
|
||||
|
||||
const docTokenArrays = documents.map((doc) => doc.tokens);
|
||||
const docTokenSets = docTokenArrays.map((tokens) => new Set(tokens));
|
||||
|
||||
// Expand query with synonyms
|
||||
@@ -205,22 +248,18 @@ export function scoreTfIdf(
|
||||
|
||||
return documents.map((doc, index) => {
|
||||
const docTokens = docTokenArrays[index];
|
||||
const titleTokens = new Set(tokenize(doc.title));
|
||||
const titleTokens = new Set(doc.titleTokens);
|
||||
let score = 0;
|
||||
const matchedTerms: string[] = [];
|
||||
|
||||
// Conflict Detection & Severity Analysis (Substring based for better recall with particles)
|
||||
const rawText = `${doc.title} ${doc.content}`.toLowerCase();
|
||||
const conflictMatches = [...SCORING_CONFIG.CONFLICT_INDICATORS].filter(indicator =>
|
||||
rawText.includes(indicator.toLowerCase())
|
||||
);
|
||||
|
||||
const conflictDetected = conflictMatches.length > 0;
|
||||
// Conflict Detection & Severity Analysis (pre-counted by caller / index)
|
||||
const conflictCount = doc.conflictCount || 0;
|
||||
const conflictDetected = conflictCount > 0;
|
||||
let conflictSeverity: ConflictSeverity = 'NONE';
|
||||
|
||||
if (conflictMatches.length >= SCORING_CONFIG.CONFLICT_THRESHOLDS.HIGH) conflictSeverity = 'HIGH';
|
||||
else if (conflictMatches.length >= SCORING_CONFIG.CONFLICT_THRESHOLDS.MEDIUM) conflictSeverity = 'MEDIUM';
|
||||
else if (conflictMatches.length >= SCORING_CONFIG.CONFLICT_THRESHOLDS.LOW) conflictSeverity = 'LOW';
|
||||
if (conflictCount >= SCORING_CONFIG.CONFLICT_THRESHOLDS.HIGH) conflictSeverity = 'HIGH';
|
||||
else if (conflictCount >= SCORING_CONFIG.CONFLICT_THRESHOLDS.MEDIUM) conflictSeverity = 'MEDIUM';
|
||||
else if (conflictCount >= SCORING_CONFIG.CONFLICT_THRESHOLDS.LOW) conflictSeverity = 'LOW';
|
||||
|
||||
for (const term of expandedQuery) {
|
||||
const tf = termFrequency(term, docTokens);
|
||||
|
||||
@@ -34,6 +34,7 @@ export async function handleAgentMessage(provider: SidebarChatProvider, data: an
|
||||
case 'saveAgentSelection':
|
||||
await provider._context.globalState.update(SidebarChatProvider.lastAgentStateKey, data.path || 'none');
|
||||
logInfo(`Agent selection saved: ${data.path}`);
|
||||
void provider._sendReadyStatus();
|
||||
return true;
|
||||
case 'getKnowledgeScope': {
|
||||
const view = (provider as any)._view as vscode.WebviewView | undefined;
|
||||
@@ -54,6 +55,7 @@ export async function handleAgentMessage(provider: SidebarChatProvider, data: an
|
||||
brainRoot,
|
||||
},
|
||||
});
|
||||
void provider._sendReadyStatus();
|
||||
return true;
|
||||
}
|
||||
case 'editKnowledgeMap':
|
||||
|
||||
@@ -32,6 +32,10 @@ export async function handleChatMessage(provider: SidebarChatProvider, data: any
|
||||
await provider._sendModels();
|
||||
await provider._sendChronicleProjects();
|
||||
await provider._restoreActiveSessionIntoView();
|
||||
await provider._sendReadyStatus();
|
||||
return true;
|
||||
case 'getReadyStatus':
|
||||
await provider._sendReadyStatus();
|
||||
return true;
|
||||
case 'getModels':
|
||||
await provider._sendModels();
|
||||
|
||||
+88
-5
@@ -1,6 +1,7 @@
|
||||
import * as vscode from 'vscode';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import {
|
||||
_getBrainDir,
|
||||
findBrainFiles,
|
||||
@@ -22,7 +23,8 @@ import { handleChatMessage } from './sidebar/chatHandlers';
|
||||
import { handleBrainMessage } from './sidebar/brainHandlers';
|
||||
import { handleChronicleMessage } from './sidebar/chronicleHandlers';
|
||||
import { handleAgentMessage } from './sidebar/agentHandlers';
|
||||
import { getOrCreateAgentEntry } from './skills/agentKnowledgeMap';
|
||||
import { getOrCreateAgentEntry, resolveScopeForAgent } from './skills/agentKnowledgeMap';
|
||||
import { estimateModelParamsB } from './lib/contextManager';
|
||||
import { loadExternalSkills, formatSkillsAsPromptBlock } from './skills/externalSkillLoader';
|
||||
|
||||
export interface SidebarLmStudioDeps {
|
||||
@@ -111,12 +113,14 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
|
||||
void this._sendModels();
|
||||
void this._sendBrainProfiles();
|
||||
void this._sendAgentsList();
|
||||
void this._sendReadyStatus();
|
||||
});
|
||||
|
||||
webviewView.webview.html = this._getHtml(webviewView.webview);
|
||||
this._agent.setWebview(webviewView.webview);
|
||||
|
||||
void this._restoreActiveSessionIntoView();
|
||||
void this._sendReadyStatus();
|
||||
|
||||
webviewView.webview.onDidReceiveMessage(async (data) => {
|
||||
if (await handleChatMessage(this, data)) return;
|
||||
@@ -393,6 +397,71 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* One-line "current readiness" snapshot for the sidebar's status bar:
|
||||
* engine online?, model loaded?, Brain file count, active Agent + mapped knowledge
|
||||
* folder count, memory on/off, context window. Cheap — no network calls except the
|
||||
* already-cached LM Studio loaded-models list and online flag.
|
||||
*/
|
||||
async _sendReadyStatus() {
|
||||
if (!this._view) return;
|
||||
let payload: any;
|
||||
try {
|
||||
const config = getConfig();
|
||||
const engineKind = resolveEngine(config.ollamaUrl);
|
||||
const activeBrain = getActiveBrainProfile();
|
||||
let brainFiles = 0;
|
||||
try { brainFiles = findBrainFiles(activeBrain.localBrainPath).length; } catch { /* ignore */ }
|
||||
|
||||
const agentPath = this._context.globalState.get<string>(SidebarChatProvider.lastAgentStateKey, 'none');
|
||||
let agentName: string | null = null;
|
||||
let scopeFolders = 0;
|
||||
let mapped = false;
|
||||
if (agentPath && agentPath !== 'none') {
|
||||
agentName = path.basename(agentPath).replace(/\.md$/i, '');
|
||||
try {
|
||||
const scope = resolveScopeForAgent(agentPath, activeBrain.localBrainPath || '');
|
||||
scopeFolders = scope.folders.length;
|
||||
if (scope.agent?.name) agentName = scope.agent.name;
|
||||
mapped = scope.source !== 'none';
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
|
||||
let modelLoaded: boolean | null = null;
|
||||
if (engineKind === 'lmstudio') {
|
||||
try {
|
||||
const loaded = (await this._lmStudio?.loadedModels()) || [];
|
||||
modelLoaded = loaded.includes(config.defaultModel);
|
||||
} catch { modelLoaded = null; }
|
||||
}
|
||||
|
||||
const paramB = estimateModelParamsB(config.defaultModel);
|
||||
const cappedForSmallModel = config.smallModelContextCap > 0
|
||||
&& paramB !== null && paramB <= 4
|
||||
&& config.contextLength > config.smallModelContextCap;
|
||||
const effectiveContextLength = cappedForSmallModel ? config.smallModelContextCap : config.contextLength;
|
||||
payload = {
|
||||
engine: {
|
||||
kind: engineKind,
|
||||
label: engineKind === 'lmstudio' ? 'LM Studio' : 'Ollama',
|
||||
online: this._modelsCache?.online ?? null,
|
||||
},
|
||||
model: { name: config.defaultModel, loaded: modelLoaded, paramB },
|
||||
brain: { name: activeBrain.name, files: brainFiles },
|
||||
agent: { name: agentName, scopeFolders, mapped },
|
||||
memory: config.memoryEnabled,
|
||||
multiAgent: config.multiAgentEnabled,
|
||||
contextLength: effectiveContextLength,
|
||||
nominalContextLength: config.contextLength,
|
||||
cappedForSmallModel,
|
||||
};
|
||||
} catch (err: any) {
|
||||
logError('Failed to build ready status.', { error: err?.message || String(err) });
|
||||
return;
|
||||
}
|
||||
this._view.webview.postMessage({ type: 'readyStatus', value: payload });
|
||||
}
|
||||
|
||||
async _sendBrainProfiles() {
|
||||
if (!this._view) return;
|
||||
const activeBrain = getActiveBrainProfile();
|
||||
@@ -411,6 +480,7 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
|
||||
profiles
|
||||
}
|
||||
});
|
||||
void this._sendReadyStatus();
|
||||
}
|
||||
|
||||
_postBrainProfiles(profiles: any[], activeBrainId: string) {
|
||||
@@ -1558,9 +1628,18 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
|
||||
}
|
||||
|
||||
_getAgentsDir(): string {
|
||||
const defaultPath = 'E:\\Wiki\\Agent\\.agent\\skills';
|
||||
if (fs.existsSync(defaultPath)) return defaultPath;
|
||||
|
||||
// 1) Explicit config override (works on any OS — useful on Windows or for skills outside the workspace).
|
||||
const configured = (vscode.workspace.getConfiguration('g1nation').get<string>('agentSkillsPath', '') || '').trim();
|
||||
const expanded = configured.startsWith('~/') || configured === '~'
|
||||
? path.join(os.homedir(), configured.slice(1).replace(/^[\\/]/, ''))
|
||||
: configured;
|
||||
if (expanded && path.isAbsolute(expanded)) {
|
||||
if (!fs.existsSync(expanded)) {
|
||||
try { fs.mkdirSync(expanded, { recursive: true }); } catch { /* fall through to workspace */ }
|
||||
}
|
||||
if (fs.existsSync(expanded)) return expanded;
|
||||
}
|
||||
// 2) Default: <workspace>/.agent/skills
|
||||
const workspaceFolders = vscode.workspace.workspaceFolders;
|
||||
if (workspaceFolders) {
|
||||
const localPath = path.join(workspaceFolders[0].uri.fsPath, '.agent', 'skills');
|
||||
@@ -1586,6 +1665,7 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
|
||||
}
|
||||
const lastPath = this._context.globalState.get<string>(SidebarChatProvider.lastAgentStateKey, 'none');
|
||||
this._view.webview.postMessage({ type: 'agentsList', value: agents, selected: lastPath });
|
||||
void this._sendReadyStatus();
|
||||
}
|
||||
|
||||
async _handleProactiveSuggestion(context: string) {
|
||||
@@ -1629,7 +1709,7 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
|
||||
|
||||
const filePath = path.join(dir, `${safeName}.md`);
|
||||
if (!fs.existsSync(filePath)) {
|
||||
fs.writeFileSync(filePath, `# Agent Persona: ${safeName}\\n\\nAdd your instructions here...\\n`, 'utf8');
|
||||
fs.writeFileSync(filePath, `# Agent Persona: ${safeName}\n\nAdd your instructions here...\n`, 'utf8');
|
||||
}
|
||||
|
||||
const doc = await vscode.workspace.openTextDocument(filePath);
|
||||
@@ -1834,6 +1914,8 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
|
||||
} catch (error: any) {
|
||||
logError('Prompt handling failed in sidebar provider.', { error: error?.message || String(error), promptPreview: summarizeText(value || '', 200) });
|
||||
this._view.webview.postMessage({ type: 'error', value: error.message });
|
||||
} finally {
|
||||
void this._sendReadyStatus();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1934,6 +2016,7 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
|
||||
} finally {
|
||||
this._modelDiscoveryInFlight = false;
|
||||
}
|
||||
void this._sendReadyStatus();
|
||||
}
|
||||
|
||||
static _htmlTemplateCache: string | undefined;
|
||||
|
||||
Reference in New Issue
Block a user