chore: version up to 2.80.34 and package
This commit is contained in:
+276
-37
@@ -40,6 +40,17 @@ import {
|
||||
import { MemoryManager } from './memory';
|
||||
import { RetrievalOrchestrator } from './retrieval';
|
||||
import { resolveScopeForAgent } from './skills/agentKnowledgeMap';
|
||||
import {
|
||||
estimateTokens,
|
||||
estimateMessagesTokens,
|
||||
computeOutputBudget,
|
||||
trimHistoryToBudget,
|
||||
truncateSystemPromptContext,
|
||||
classifyStopReason,
|
||||
truncationNotice,
|
||||
estimateModelParamsB,
|
||||
type ContextLimits,
|
||||
} from './lib/contextManager';
|
||||
|
||||
export interface ChatMessage {
|
||||
role: 'user' | 'assistant' | 'system';
|
||||
@@ -94,7 +105,17 @@ const AGENT_PROMPTS: Record<AgentRole, string> = {
|
||||
3. Deliver a logical, consistent, and polished response.`
|
||||
};
|
||||
|
||||
// Local-path detectors used to decide whether a user prompt refers to a file/dir on disk.
|
||||
// POSIX: /Volumes/, /Users/, /home/, /opt/, ... or ~/ — backtick excluded (markdown code spans).
|
||||
const POSIX_ABS_PATH_SRC = "(?:\\/(?:Volumes|Users|home|opt|srv|mnt|data|workspace)\\/|~\\/)[^\\s`\"'<>|*?]+";
|
||||
// Windows: drive letter (C:\ or C:/) or UNC (\\server\share). Backslash IS allowed as a separator here.
|
||||
const WIN_ABS_PATH_SRC = "(?:[A-Za-z]:[\\\\/]|\\\\\\\\[^\\s\\\\/]+\\\\[^\\s\\\\/]+)[^\\s`\"'<>|*?]*";
|
||||
|
||||
export class AgentExecutor {
|
||||
/** Non-global instances — safe for repeated `.test()` (a shared /g/ regex's lastIndex would corrupt results). */
|
||||
static readonly ABS_PATH_RE = new RegExp(POSIX_ABS_PATH_SRC, 'i');
|
||||
static readonly WIN_ABS_PATH_RE = new RegExp(WIN_ABS_PATH_SRC, 'i');
|
||||
|
||||
private chatHistory: ChatMessage[] = [];
|
||||
private abortController: AbortController | null = null;
|
||||
private webview: vscode.Webview | undefined;
|
||||
@@ -107,6 +128,17 @@ export class AgentExecutor {
|
||||
private memoryManager: MemoryManager;
|
||||
private retrievalOrchestrator: RetrievalOrchestrator;
|
||||
private currentTaskId: string = 'default_session';
|
||||
/** Set by buildMemoryContext after each retrieval — fed to the webview's per-answer "scope used" footer. */
|
||||
private _lastRetrievalInfo: {
|
||||
agentName: string | null;
|
||||
scoped: boolean;
|
||||
source: string;
|
||||
configuredFolders: string[]; // relative to brain root
|
||||
usedBrainFiles: string[]; // relative to brain root
|
||||
usedMemoryLayers: string[]; // raw RetrievalSource ids
|
||||
totalChunks: number;
|
||||
selectedChunks: number;
|
||||
} | null = null;
|
||||
|
||||
private readonly options: AgentExecutorOptions;
|
||||
|
||||
@@ -445,7 +477,8 @@ export class AgentExecutor {
|
||||
].join('\n');
|
||||
|
||||
// 3. 조립: 기본(축소) → 유틸리티 컨텍스트 → 에이전트 프롬프트(최후단)
|
||||
fullSystemPrompt = `${strippedSystemPrompt}${internetCtx}${memoryCtx}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${brainContext}${brainInventoryCtx}\n${contextBlock}${negativeCtx}${agentDirective}`;
|
||||
// [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — agentDirective/negative 는 보호.
|
||||
fullSystemPrompt = `${strippedSystemPrompt}${internetCtx}${memoryCtx}${designerCtx}${secondBrainTraceCtx}\n\n[CONTEXT]\n${brainContext}${brainInventoryCtx}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}${agentDirective}`;
|
||||
} else {
|
||||
// 기존 Astra 모드 (에이전트 미선택)
|
||||
const localProjectKnowledgeCtx = prompt && localPathContext && this.isProjectKnowledgeCreationRequest(prompt)
|
||||
@@ -464,13 +497,95 @@ export class AgentExecutor {
|
||||
"- [충돌] 지식 간 충돌 발생 시 시스템이 독단적으로 판단하지 말고, 반드시 [CONFLICT WARNING] 플래그와 함께 상충되는 두 관점을 모두 명시하여 사용자에게 판단을 위임할 것."
|
||||
].join('\n');
|
||||
|
||||
fullSystemPrompt = `${systemPrompt}${internetCtx}${memoryCtx}${designerCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}\n\n[CONTEXT]\n${brainContext}${brainInventoryCtx}\n${contextBlock}${negativeCtx}`;
|
||||
// [CONTEXT] … [/CONTEXT] 사이만 컨텍스트 초과 시 trim 대상 — negative constraints 는 보호.
|
||||
fullSystemPrompt = `${systemPrompt}${internetCtx}${memoryCtx}${designerCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}\n\n[CONTEXT]\n${brainContext}${brainInventoryCtx}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
|
||||
}
|
||||
// ──────────────────────────────────────────────────────────────────
|
||||
// [Context Limit Manager] context length 는 "답변을 그만큼 길게 써도 된다"
|
||||
// 는 뜻이 아니다: 시스템 프롬프트 + 대화 기록 + 입력 + 생성될 답변 + 여유분 ≤ context length.
|
||||
// 요청을 보내기 전에 입력 토큰을 추정해서
|
||||
// (1) 시스템 프롬프트가 과하면 [CONTEXT] 블록을 마지막 수단으로 줄이고
|
||||
// (2) 대화 기록을 남은 예산에 맞게 압축하고 (UI 표시용 chatHistory 는 건드리지 않음)
|
||||
// (3) 동적으로 출력 상한(maxOutputTokens)을 계산한다.
|
||||
// ──────────────────────────────────────────────────────────────────
|
||||
// Small models (≤4B) routinely fail on prompts that fit their *nominal* context but
|
||||
// exceed their *effective* capability (server log shows truncated=0 yet eval time≈0ms —
|
||||
// the model emitted EOS as the first token). When detected, budget against a smaller
|
||||
// effective window so the system prompt / RAG / history get shrunk proactively.
|
||||
const modelParamB = estimateModelParamsB(actualModel);
|
||||
const smallModelCap = config.smallModelContextCap; // 0 disables this guard
|
||||
const cappedForSmallModel = smallModelCap > 0
|
||||
&& modelParamB !== null && modelParamB <= 4
|
||||
&& config.contextLength > smallModelCap;
|
||||
const effectiveContextLength = cappedForSmallModel ? smallModelCap : config.contextLength;
|
||||
if (cappedForSmallModel) {
|
||||
logInfo('Small model detected — capping effective context window for budgeting.', {
|
||||
model: actualModel, paramB: modelParamB,
|
||||
nominalContext: config.contextLength, effectiveContext: effectiveContextLength,
|
||||
});
|
||||
}
|
||||
const ctxLimits: ContextLimits = {
|
||||
contextLength: effectiveContextLength,
|
||||
maxOutputTokens: config.maxOutputTokens,
|
||||
safetyMargin: config.contextSafetyMargin,
|
||||
minOutputTokens: 512,
|
||||
};
|
||||
const imageCount = (reqMessages as any[])
|
||||
.reduce((n, m) => n + (Array.isArray(m?.images) ? m.images.length : 0), 0);
|
||||
const imageTokenReserve = imageCount * 1024;
|
||||
|
||||
// (1) 시스템 프롬프트는 예산의 ~65%까지만 허용 — 그 이상이면 [CONTEXT] 블록부터 잘라낸다.
|
||||
const systemCapTokens = Math.max(
|
||||
1024,
|
||||
Math.floor((ctxLimits.contextLength - ctxLimits.safetyMargin - ctxLimits.minOutputTokens - imageTokenReserve) * 0.65)
|
||||
);
|
||||
const { prompt: budgetedSystemPrompt, truncated: systemTruncated } =
|
||||
truncateSystemPromptContext(fullSystemPrompt, systemCapTokens);
|
||||
if (systemTruncated) {
|
||||
logInfo('System prompt context truncated to fit the context window.', { model: actualModel, systemCapTokens });
|
||||
}
|
||||
const systemTokens = estimateTokens(budgetedSystemPrompt) + 4;
|
||||
|
||||
// (2) 대화 기록 압축.
|
||||
const historyBudget = Math.max(
|
||||
256,
|
||||
ctxLimits.contextLength - systemTokens - ctxLimits.safetyMargin - ctxLimits.minOutputTokens - imageTokenReserve
|
||||
);
|
||||
let budgetedHistory: ChatMessage[] = reqMessages;
|
||||
if (config.autoCompactHistory) {
|
||||
const trim = trimHistoryToBudget<ChatMessage>(reqMessages, historyBudget, (n) => ({
|
||||
role: 'system',
|
||||
content: `[이전 대화 ${n}개 메시지는 컨텍스트 한계 때문에 이번 요청에서 생략되었습니다. 필요하면 사용자에게 다시 확인하세요.]`,
|
||||
internal: true,
|
||||
}));
|
||||
budgetedHistory = trim.messages;
|
||||
if (trim.droppedCount > 0) {
|
||||
logInfo('Conversation history compacted to fit the context window.', {
|
||||
model: actualModel, droppedCount: trim.droppedCount, historyBudget,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const messagesForRequest: ChatMessage[] = [
|
||||
{ role: 'system', content: fullSystemPrompt, internal: true },
|
||||
...reqMessages
|
||||
{ role: 'system', content: budgetedSystemPrompt, internal: true },
|
||||
...budgetedHistory
|
||||
];
|
||||
|
||||
// (3) 동적 출력 상한.
|
||||
const inputTokens = estimateMessagesTokens(messagesForRequest) + imageTokenReserve;
|
||||
const outputBudget = computeOutputBudget(inputTokens, ctxLimits);
|
||||
const maxOutputTokens = outputBudget.maxOutputTokens;
|
||||
if (outputBudget.tight) {
|
||||
logError('Prompt nearly fills the context window — output budget is at the minimum.', {
|
||||
model: actualModel, contextLength: ctxLimits.contextLength, inputTokens, maxOutputTokens,
|
||||
});
|
||||
}
|
||||
logInfo('Context budget computed.', {
|
||||
model: actualModel, contextLength: ctxLimits.contextLength,
|
||||
inputTokens, maxOutputTokens, droppedHistory: reqMessages.length - budgetedHistory.length,
|
||||
});
|
||||
let finishStopReason: string | undefined;
|
||||
|
||||
// 4. Call AI Engine
|
||||
this.abortController = new AbortController();
|
||||
requestTimeoutHandle = setTimeout(() => {
|
||||
@@ -485,6 +600,30 @@ export class AgentExecutor {
|
||||
let buffer = '';
|
||||
|
||||
if (loopDepth === 0) {
|
||||
// Context-budget preview so the UI can show what actually went into this turn
|
||||
// (≈N tokens, Brain N files, open file included?, history compacted?, small-model warning).
|
||||
this.webview.postMessage({
|
||||
type: 'contextBudget',
|
||||
value: {
|
||||
model: actualModel,
|
||||
engine,
|
||||
paramB: modelParamB,
|
||||
contextLength: ctxLimits.contextLength,
|
||||
nominalContextLength: config.contextLength,
|
||||
cappedForSmallModel,
|
||||
inputTokens,
|
||||
maxOutputTokens,
|
||||
systemTokens,
|
||||
historyKept: budgetedHistory.length,
|
||||
droppedHistory: reqMessages.length - budgetedHistory.length,
|
||||
systemTruncated,
|
||||
includesOpenFile: !!contextBlock && contextBlock.includes('[Currently open file:'),
|
||||
brainFiles: brainFiles.length,
|
||||
imageCount,
|
||||
tight: outputBudget.tight,
|
||||
smallModel: cappedForSmallModel || (modelParamB !== null && modelParamB <= 3 && inputTokens > 8000),
|
||||
},
|
||||
});
|
||||
this.webview.postMessage({ type: 'streamStart' });
|
||||
this.options.onStreamLifecycle?.start();
|
||||
}
|
||||
@@ -497,18 +636,25 @@ export class AgentExecutor {
|
||||
modelName: actualModel,
|
||||
messages: messagesForRequest.map((m) => ({ role: m.role, content: m.content })),
|
||||
temperature,
|
||||
maxTokens: maxOutputTokens,
|
||||
contextOverflowPolicy: config.contextOverflowPolicy,
|
||||
signal: this.abortController.signal,
|
||||
});
|
||||
for await (const { token } of stream) {
|
||||
for await (const { token, stopReason } of stream) {
|
||||
if (this.isStaleRun(runId)) return;
|
||||
if (token) aiResponseText += token;
|
||||
if (stopReason) finishStopReason = stopReason;
|
||||
}
|
||||
} catch (err: any) {
|
||||
if (err?.name === 'AbortError' || this.abortController.signal.aborted) {
|
||||
logInfo('Generation aborted by user.');
|
||||
} else {
|
||||
logError('LM Studio SDK chat failed.', { engine, error: err?.message ?? String(err) });
|
||||
this.webview?.postMessage({ type: 'error', value: `LM Studio: ${err?.message ?? err}` });
|
||||
const msg = err?.message ?? String(err);
|
||||
if (/context\s*length|contextlengthreached|exceed|too\s*long/i.test(msg)) {
|
||||
finishStopReason = 'contextLengthReached';
|
||||
}
|
||||
logError('LM Studio SDK chat failed.', { engine, error: msg });
|
||||
this.webview?.postMessage({ type: 'error', value: `LM Studio: ${msg}` });
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -516,7 +662,9 @@ export class AgentExecutor {
|
||||
baseUrl: ollamaUrl,
|
||||
modelName: actualModel,
|
||||
reqMessages: messagesForRequest,
|
||||
temperature
|
||||
temperature,
|
||||
maxTokens: maxOutputTokens,
|
||||
contextLength: ctxLimits.contextLength
|
||||
});
|
||||
const { response, apiUrl: restApiUrl } = request;
|
||||
apiUrl = restApiUrl;
|
||||
@@ -551,6 +699,10 @@ export class AgentExecutor {
|
||||
if (token) {
|
||||
aiResponseText += token;
|
||||
}
|
||||
const fr = engine === 'lmstudio'
|
||||
? json.choices?.[0]?.finish_reason
|
||||
: (json.done_reason ?? (json.done === true ? 'stop' : undefined));
|
||||
if (fr) finishStopReason = fr;
|
||||
} catch (e: any) {
|
||||
logError('Failed to parse streaming chunk.', { engine, apiUrl, chunk: summarizeText(trimmed, 300), error: e?.message || String(e) });
|
||||
}
|
||||
@@ -578,6 +730,10 @@ export class AgentExecutor {
|
||||
if (token) {
|
||||
aiResponseText += token;
|
||||
}
|
||||
const fr = engine === 'lmstudio'
|
||||
? json.choices?.[0]?.finish_reason
|
||||
: (json.done_reason ?? (json.done === true ? 'stop' : undefined));
|
||||
if (fr) finishStopReason = fr;
|
||||
} catch (e: any) {
|
||||
logError('Failed to parse final streaming buffer.', { engine, apiUrl, buffer: summarizeText(buffer, 300), error: e?.message || String(e) });
|
||||
}
|
||||
@@ -617,12 +773,15 @@ export class AgentExecutor {
|
||||
modelName: actualModel,
|
||||
messages: messagesForRequest.map((m) => ({ role: m.role, content: m.content })),
|
||||
temperature,
|
||||
maxTokens: maxOutputTokens,
|
||||
contextOverflowPolicy: config.contextOverflowPolicy,
|
||||
signal: this.abortController.signal,
|
||||
});
|
||||
let retryText = '';
|
||||
for await (const { token } of retryStream) {
|
||||
for await (const { token, stopReason } of retryStream) {
|
||||
if (this.isStaleRun(runId)) return;
|
||||
if (token) retryText += token;
|
||||
if (stopReason) finishStopReason = stopReason;
|
||||
}
|
||||
if (retryText.trim()) {
|
||||
aiResponseText = retryText;
|
||||
@@ -642,11 +801,14 @@ export class AgentExecutor {
|
||||
engine,
|
||||
messages: messagesForRequest,
|
||||
temperature,
|
||||
maxTokens: maxOutputTokens,
|
||||
contextLength: ctxLimits.contextLength,
|
||||
signal: this.abortController?.signal,
|
||||
});
|
||||
if (fallback && fallback.trim()) {
|
||||
aiResponseText = fallback;
|
||||
logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.length });
|
||||
if (fallback.stopReason) finishStopReason = fallback.stopReason;
|
||||
if (fallback.text && fallback.text.trim()) {
|
||||
aiResponseText = fallback.text;
|
||||
logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.text.length });
|
||||
}
|
||||
} catch (recoverErr: any) {
|
||||
logError('Non-streaming fallback also failed.', {
|
||||
@@ -688,6 +850,18 @@ export class AgentExecutor {
|
||||
].join('\n');
|
||||
}
|
||||
}
|
||||
// Surface truncated/abnormal generation so the user knows the answer is incomplete.
|
||||
const stopKind = classifyStopReason(finishStopReason);
|
||||
if (stopKind === 'output-limit' || stopKind === 'context-overflow' || stopKind === 'error') {
|
||||
logError('Generation stopped abnormally.', {
|
||||
model: actualModel, engine, stopReason: finishStopReason, stopKind,
|
||||
inputTokens, maxOutputTokens, answerChars: assistantContent.length,
|
||||
});
|
||||
}
|
||||
const notice = truncationNotice(stopKind);
|
||||
if (notice && assistantContent.trim()) {
|
||||
assistantContent = assistantContent.trimEnd() + notice;
|
||||
}
|
||||
const finalAssistantContent = assistantContent;
|
||||
|
||||
const assistantMessage: ChatMessage = { role: 'assistant', content: finalAssistantContent, internal: false, rationale };
|
||||
@@ -700,7 +874,9 @@ export class AgentExecutor {
|
||||
const promptCharCount = messagesForRequest.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
|
||||
logError('Model returned an empty response without actions.', {
|
||||
model: actualModel, engine, apiUrl, loopDepth,
|
||||
promptCharCount, messageCount: messagesForRequest.length,
|
||||
promptCharCount, inputTokens, maxOutputTokens, contextLength: ctxLimits.contextLength,
|
||||
estimatedOverflow: outputBudget.tight, stopReason: finishStopReason,
|
||||
messageCount: messagesForRequest.length,
|
||||
fallbackTried: loopDepth === 0 ? 'yes' : 'no',
|
||||
});
|
||||
// Cheap heuristic: parse a parameter-count hint out of the
|
||||
@@ -717,20 +893,23 @@ export class AgentExecutor {
|
||||
const contextLimitHint =
|
||||
'LM Studio 로그에 `n_tokens = N, truncated = 0` 인데 `eval time` 이 0ms 라면 모델이 첫 토큰부터 EOS 를 뱉은 것입니다. 보통 컨텍스트 한계 초과 또는 모델 용량 부족입니다. 더 큰 모델(7B+)로 교체하거나 컨텍스트를 줄여 보세요.';
|
||||
|
||||
const looksOverflow = outputBudget.tight || inputTokens > ctxLimits.contextLength - ctxLimits.safetyMargin;
|
||||
this.webview.postMessage({
|
||||
type: 'error',
|
||||
value: [
|
||||
'AI 엔진이 빈 응답을 반환했습니다 (스트리밍 + non-streaming 폴백 모두 실패).',
|
||||
`Engine: ${engine}`,
|
||||
`Model: ${actualModel}`,
|
||||
`Prompt size: ${promptCharCount.toLocaleString()} chars across ${messagesForRequest.length} message(s)`,
|
||||
`Prompt: ~${inputTokens.toLocaleString()} tokens (${promptCharCount.toLocaleString()} chars, ${messagesForRequest.length} messages) / context window ${ctxLimits.contextLength.toLocaleString()} tokens`,
|
||||
`Output budget: ${maxOutputTokens.toLocaleString()} tokens`,
|
||||
...(finishStopReason ? [`Stop reason: ${finishStopReason}`] : []),
|
||||
'',
|
||||
'다음을 시도해보세요:',
|
||||
' • LM Studio에서 모델이 실제로 로드되어 있는지 확인',
|
||||
promptCharCount > 16000
|
||||
? ' • 프롬프트가 너무 큽니다 (16k chars 초과). Skill/Brain 컨텍스트를 좁혀 보세요.'
|
||||
looksOverflow
|
||||
? ' • 입력이 모델 context window 에 가깝습니다. `/newChat` 으로 대화를 새로 시작하거나, Skill/Brain 컨텍스트를 줄이거나, Settings 의 `g1nation.contextLength` 를 모델 실제 값으로 맞추세요.'
|
||||
: ' • 다른 모델로 전환하거나 LM Studio 서버를 재시작',
|
||||
' • Settings에서 maxContextSize 또는 memoryLongTermFiles 줄이기',
|
||||
' • Settings에서 maxContextSize / memoryLongTermFiles 줄이기',
|
||||
...(looksSmall || promptIsLarge ? [' • ' + contextLimitHint] : []),
|
||||
].join('\n')
|
||||
});
|
||||
@@ -765,6 +944,12 @@ export class AgentExecutor {
|
||||
}
|
||||
|
||||
this.statusBarManager.updateStatus(AgentStatus.Success);
|
||||
if (this._lastRetrievalInfo) {
|
||||
this.webview.postMessage({
|
||||
type: 'usedScope',
|
||||
value: { ...this._lastRetrievalInfo, hasAgentSelected: !!options.agentSkillFile },
|
||||
});
|
||||
}
|
||||
this.webview.postMessage({ type: 'streamChunk', value: finalAssistantContent });
|
||||
|
||||
} catch (error: any) {
|
||||
@@ -863,12 +1048,17 @@ export class AgentExecutor {
|
||||
|
||||
private async callAgent(role: AgentRole, prompt: string, modelName: string, options: any): Promise<string> {
|
||||
const persona = AGENT_PROMPTS[role];
|
||||
const { ollamaUrl } = getConfig();
|
||||
const { ollamaUrl, contextLength, maxOutputTokens, contextSafetyMargin, contextOverflowPolicy } = getConfig();
|
||||
|
||||
const messages: ChatMessage[] = [
|
||||
{ role: 'system', content: persona },
|
||||
{ role: 'user', content: prompt }
|
||||
];
|
||||
// Dynamic output cap so input + output stays within the context window.
|
||||
const inputTokens = estimateMessagesTokens(messages);
|
||||
const { maxOutputTokens: subMaxTokens } = computeOutputBudget(inputTokens, {
|
||||
contextLength, maxOutputTokens, safetyMargin: contextSafetyMargin, minOutputTokens: 512,
|
||||
});
|
||||
|
||||
const engine = resolveEngine(ollamaUrl);
|
||||
let responseText = '';
|
||||
@@ -879,6 +1069,8 @@ export class AgentExecutor {
|
||||
modelName,
|
||||
messages: messages.map((m) => ({ role: m.role, content: m.content })),
|
||||
temperature: 0.3,
|
||||
maxTokens: subMaxTokens,
|
||||
contextOverflowPolicy,
|
||||
signal: this.abortController?.signal,
|
||||
});
|
||||
for await (const { token } of stream) {
|
||||
@@ -896,7 +1088,9 @@ export class AgentExecutor {
|
||||
baseUrl: ollamaUrl,
|
||||
modelName: modelName,
|
||||
reqMessages: messages,
|
||||
temperature: 0.3 // Use lower temperature for planning and research
|
||||
temperature: 0.3, // Use lower temperature for planning and research
|
||||
maxTokens: subMaxTokens,
|
||||
contextLength
|
||||
});
|
||||
|
||||
const reader = request.response.body?.getReader();
|
||||
@@ -1218,16 +1412,17 @@ export class AgentExecutor {
|
||||
|
||||
/**
|
||||
* 프롬프트에 로컬 파일/디렉토리 경로가 포함되어 있는지 감지합니다.
|
||||
* 절대 경로: /Volumes/, /Users/, /home/, ~/
|
||||
* POSIX 절대 경로: /Volumes/, /Users/, /home/, /opt/, ~/
|
||||
* Windows 절대 경로: C:\..., D:/..., \\server\share\...
|
||||
* 상대 경로: src/..., lib/..., components/..., tests/... 등 + 파일 확장자
|
||||
*/
|
||||
private containsLocalFilePath(prompt: string): boolean {
|
||||
// 절대 경로
|
||||
if (/(?:\/Volumes\/|\/Users\/|\/home\/|~\/)[^\s`"'<>]+/i.test(prompt)) {
|
||||
// 절대 경로 (POSIX + Windows 드라이브 문자 + UNC)
|
||||
if (AgentExecutor.ABS_PATH_RE.test(prompt) || AgentExecutor.WIN_ABS_PATH_RE.test(prompt)) {
|
||||
return true;
|
||||
}
|
||||
// 상대 경로 패턴: 디렉토리/파일명.확장자 형태 (src/lib/engine.ts, components/App.tsx 등)
|
||||
if (/(?:^|[\s,])(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)\//i.test(prompt)
|
||||
// 상대 경로 패턴: 디렉토리/파일명.확장자 형태 (src/lib/engine.ts, components\App.tsx 등)
|
||||
if (/(?:^|[\s,])(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)[\\/]/i.test(prompt)
|
||||
&& /\.[a-z]{1,6}(?:[\s,;)\]]|$)/i.test(prompt)) {
|
||||
return true;
|
||||
}
|
||||
@@ -1551,15 +1746,21 @@ export class AgentExecutor {
|
||||
|
||||
private extractLocalProjectPaths(prompt: string, rootPath?: string): string[] {
|
||||
const results: string[] = [];
|
||||
const stripTrailingPunct = (s: string) => s.replace(/[),.;\]]+$/g, '');
|
||||
|
||||
// 1. 절대 경로 감지: /Volumes/, /Users/, /home/, ~/
|
||||
const absMatches = prompt.match(/(?:\/Volumes\/|\/Users\/|\/home\/|~\/)[^\s`"'<>]+/gi) || [];
|
||||
// 1a. POSIX 절대 경로: /Volumes/, /Users/, /home/, /opt/, ~/
|
||||
const absMatches = prompt.match(new RegExp(POSIX_ABS_PATH_SRC, 'gi')) || [];
|
||||
for (const m of absMatches) {
|
||||
results.push(m.replace(/[),.;\]]+$/g, ''));
|
||||
results.push(stripTrailingPunct(m));
|
||||
}
|
||||
// 1b. Windows 절대 경로: C:\..., D:/..., \\server\share\...
|
||||
const winMatches = prompt.match(new RegExp(WIN_ABS_PATH_SRC, 'gi')) || [];
|
||||
for (const m of winMatches) {
|
||||
results.push(stripTrailingPunct(m));
|
||||
}
|
||||
|
||||
// 2. 상대 경로 감지: src/lib/engine.ts, components/App.tsx 등
|
||||
const relMatches = prompt.match(/(?:^|[\s,])(?:(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)\/[^\s`"'<>]+\.[a-z]{1,6})/gi) || [];
|
||||
// 2. 상대 경로 감지: src/lib/engine.ts, components/App.tsx, src\lib\engine.ts 등
|
||||
const relMatches = prompt.match(/(?:^|[\s,])(?:(?:src|lib|components|pages|app|tests|test|utils|core|features|hooks|services|config|public|assets|docs|scripts)[\\/][^\s`"'<>]+\.[a-z]{1,6})/gi) || [];
|
||||
for (const m of relMatches) {
|
||||
const cleaned = m.trim().replace(/^,\s*/, '').replace(/[),.;\]]+$/g, '');
|
||||
if (rootPath) {
|
||||
@@ -1924,6 +2125,7 @@ export class AgentExecutor {
|
||||
|
||||
private buildMemoryContext(currentPrompt: string, activeBrain: BrainProfile, agentSkillFile?: string): string {
|
||||
const config = getConfig();
|
||||
this._lastRetrievalInfo = null;
|
||||
if (!config.memoryEnabled) return '';
|
||||
|
||||
// Update memory manager config in case settings changed
|
||||
@@ -1956,6 +2158,27 @@ export class AgentExecutor {
|
||||
scopeFolders: scope.folders
|
||||
});
|
||||
|
||||
// Stash what actually fed this turn so handlePrompt can show it under the answer.
|
||||
const brainRoot = activeBrain.localBrainPath;
|
||||
const rel = (p?: string) => (p ? (path.relative(brainRoot, p) || p) : '');
|
||||
this._lastRetrievalInfo = {
|
||||
agentName: scope.agent?.name ?? null,
|
||||
scoped: scope.folders.length > 0,
|
||||
source: String((scope as any).source ?? ''),
|
||||
configuredFolders: scope.folders.map((abs) => rel(abs)),
|
||||
usedBrainFiles: result.selectedChunks
|
||||
.filter((c) => c.source === 'brain-memory' && c.metadata.filePath)
|
||||
.map((c) => rel(c.metadata.filePath))
|
||||
.filter((p, i, arr) => p && arr.indexOf(p) === i),
|
||||
usedMemoryLayers: Array.from(new Set(
|
||||
result.selectedChunks
|
||||
.filter((c) => c.source !== 'brain-memory' && c.source !== 'brain-trace')
|
||||
.map((c) => c.source as string)
|
||||
)),
|
||||
totalChunks: result.totalChunks,
|
||||
selectedChunks: result.selectedChunks.length,
|
||||
};
|
||||
|
||||
return this.retrievalOrchestrator.buildContextString(result);
|
||||
}
|
||||
|
||||
@@ -1999,8 +2222,14 @@ export class AgentExecutor {
|
||||
modelName: string;
|
||||
reqMessages: ChatMessage[];
|
||||
temperature: number;
|
||||
/** Dynamic output-token cap computed from the remaining context budget. */
|
||||
maxTokens?: number;
|
||||
/** Model context window in tokens (used for Ollama's num_ctx). */
|
||||
contextLength?: number;
|
||||
}): Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string }> {
|
||||
const { baseUrl, modelName, reqMessages, temperature } = params;
|
||||
const maxTokens = Math.max(256, params.maxTokens ?? 4096);
|
||||
const numCtx = Math.max(2048, params.contextLength ?? 32768);
|
||||
const engine = resolveEngine(baseUrl); // 사용자가 설정한 엔진만 사용
|
||||
const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
|
||||
const messageVariants = this.buildEngineMessageVariants(reqMessages, engine);
|
||||
@@ -2015,8 +2244,8 @@ export class AgentExecutor {
|
||||
messages: variant.messages,
|
||||
stream: true,
|
||||
...(engine === 'lmstudio'
|
||||
? { max_tokens: 4096, temperature }
|
||||
: { options: { num_ctx: 32768, num_predict: 4096, temperature } }),
|
||||
? { max_tokens: maxTokens, temperature }
|
||||
: { options: { num_ctx: numCtx, num_predict: maxTokens, temperature } }),
|
||||
};
|
||||
|
||||
// 일시적 네트워크 오류용 retry (최대 2회, 지수 backoff)
|
||||
@@ -2100,9 +2329,13 @@ export class AgentExecutor {
|
||||
engine: 'lmstudio' | 'ollama';
|
||||
messages: ChatMessage[];
|
||||
temperature: number;
|
||||
maxTokens?: number;
|
||||
contextLength?: number;
|
||||
signal?: AbortSignal;
|
||||
}): Promise<string> {
|
||||
}): Promise<{ text: string; stopReason?: string }> {
|
||||
const { baseUrl, modelName, engine, messages, temperature, signal } = params;
|
||||
const maxTokens = Math.max(256, params.maxTokens ?? 4096);
|
||||
const numCtx = Math.max(2048, params.contextLength ?? 32768);
|
||||
const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
|
||||
const variants = this.buildEngineMessageVariants(messages, engine);
|
||||
const body = {
|
||||
@@ -2110,8 +2343,8 @@ export class AgentExecutor {
|
||||
messages: variants[0].messages,
|
||||
stream: false,
|
||||
...(engine === 'lmstudio'
|
||||
? { max_tokens: 4096, temperature }
|
||||
: { options: { num_ctx: 32768, num_predict: 4096, temperature } }),
|
||||
? { max_tokens: maxTokens, temperature }
|
||||
: { options: { num_ctx: numCtx, num_predict: maxTokens, temperature } }),
|
||||
};
|
||||
const response = await fetch(apiUrl, {
|
||||
method: 'POST',
|
||||
@@ -2127,11 +2360,17 @@ export class AgentExecutor {
|
||||
try {
|
||||
const json = JSON.parse(text);
|
||||
if (engine === 'lmstudio') {
|
||||
return json?.choices?.[0]?.message?.content ?? '';
|
||||
return {
|
||||
text: json?.choices?.[0]?.message?.content ?? '',
|
||||
stopReason: json?.choices?.[0]?.finish_reason,
|
||||
};
|
||||
}
|
||||
return json?.message?.content ?? json?.response ?? '';
|
||||
return {
|
||||
text: json?.message?.content ?? json?.response ?? '',
|
||||
stopReason: json?.done_reason ?? (json?.done === true ? 'stop' : undefined),
|
||||
};
|
||||
} catch {
|
||||
return '';
|
||||
return { text: '' };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user