Build: Release v2.80.28

This commit is contained in:
g1nation
2026-05-10 22:41:35 +09:00
parent d899daa118
commit ec71014481
11 changed files with 200 additions and 39 deletions
+135 -19
View File
@@ -39,6 +39,7 @@ import {
} from './features/secondBrainTrace';
import { MemoryManager } from './memory';
import { RetrievalOrchestrator } from './retrieval';
import { resolveScopeForAgent } from './skills/agentKnowledgeMap';
export interface ChatMessage {
role: 'user' | 'assistant' | 'system';
@@ -245,6 +246,7 @@ export class AgentExecutor {
systemPrompt?: string,
runId?: number,
agentSkillContext?: string,
agentSkillFile?: string,
negativePrompt?: string,
designerContext?: string,
secondBrainTraceEnabled?: boolean,
@@ -414,7 +416,7 @@ export class AgentExecutor {
const secondBrainTraceCtx = secondBrainTrace
? `\n\n${renderSecondBrainTraceContext(secondBrainTrace)}`
: '';
const memoryCtx = this.buildMemoryContext(prompt || '', activeBrain);
const memoryCtx = this.buildMemoryContext(prompt || '', activeBrain, options.agentSkillFile);
// ──────────────────────────────────────────────────────────────────
// [Agent Mode v3] 에이전트가 선택된 경우, Astra 기본 포맷/페르소나 섹션을
@@ -524,6 +526,12 @@ export class AgentExecutor {
if (!reader) throw new Error("Response body is not readable.");
const decoder = new TextDecoder();
// try/finally guarantees the reader's lock is released on every
// exit path (normal end, AbortError, parse exception, stale-run
// early return). Without this, downstream consumers — including
// any retry path that wants to drain the same body — fail with
// "lock() request could not be registered" because the previous
// reader still holds the stream lock.
try {
while (true) {
const { done, value } = await reader.read();
@@ -555,6 +563,8 @@ export class AgentExecutor {
logError('Stream reading error.', { engine, apiUrl, error: err?.message || String(err) });
this.webview?.postMessage({ type: 'error', value: `Connection lost: ${err.message}` });
}
} finally {
try { reader.releaseLock(); } catch { /* reader may already be released on AbortError */ }
}
}
@@ -579,6 +589,38 @@ export class AgentExecutor {
requestTimeoutHandle = undefined;
}
// ── Empty-response auto-recovery ──
// Streaming failed silently (network blip, model cold-start, context
// overflow, etc.). Before surfacing the error to the user, try one
// non-streaming retry: many LM Studio failures are streaming-only
// (the SSE channel drops mid-token while a single POST returns the
// whole answer fine). This covers the most common "empty response"
// pattern users hit without the user having to click anything.
//
// Only attempts recovery on loopDepth === 0 — we don't want to
// ping-pong inside the autonomous action loop.
if (!aiResponseText.trim() && !this.abortController?.signal.aborted && loopDepth === 0) {
try {
logInfo('Empty stream — trying non-streaming fallback.', { engine, model: actualModel, apiUrl });
const fallback = await this.callNonStreaming({
baseUrl: ollamaUrl,
modelName: actualModel,
engine,
messages: messagesForRequest,
temperature,
signal: this.abortController?.signal,
});
if (fallback && fallback.trim()) {
aiResponseText = fallback;
logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.length });
}
} catch (recoverErr: any) {
logError('Non-streaming fallback also failed.', {
engine, model: actualModel, error: recoverErr?.message ?? String(recoverErr),
});
}
}
// 5. Execute Actions
const rationale = this.parseRationale(aiResponseText);
let assistantContent = this.enforceLocalPathReviewAnswer(
@@ -630,14 +672,26 @@ export class AgentExecutor {
this.statusBarManager.updateStatus(AgentStatus.Executing);
const report = await this.executeActions(aiResponseText, rootPath, activeBrain);
if (!assistantContent.trim() && report.length === 0) {
logError('Model returned an empty response without actions.', { model: actualModel, engine, apiUrl, loopDepth });
const promptCharCount = messagesForRequest.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
logError('Model returned an empty response without actions.', {
model: actualModel, engine, apiUrl, loopDepth,
promptCharCount, messageCount: messagesForRequest.length,
fallbackTried: loopDepth === 0 ? 'yes' : 'no',
});
this.webview.postMessage({
type: 'error',
value: [
'AI engine returned an empty response.',
'AI 엔진이 빈 응답을 반환했습니다 (스트리밍 + non-streaming 폴백 모두 실패).',
`Engine: ${engine}`,
`Model: ${actualModel}`,
'The request reached the local LLM server, but no usable content was returned. Try another model, restart the local server, or reduce the prompt/context size.'
`Prompt size: ${promptCharCount.toLocaleString()} chars across ${messagesForRequest.length} message(s)`,
'',
'다음을 시도해보세요:',
' • LM Studio에서 모델이 실제로 로드되어 있는지 확인',
promptCharCount > 16000
? ' • 프롬프트가 너무 큽니다 (16k chars 초과). Skill/Brain 컨텍스트를 좁혀 보세요.'
: ' • 다른 모델로 전환하거나 LM Studio 서버를 재시작',
' • Settings에서 maxContextSize 또는 memoryLongTermFiles 줄이기',
].join('\n')
});
return;
@@ -809,20 +863,24 @@ export class AgentExecutor {
if (!reader) throw new Error("Agent response body is not readable.");
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value, { stream: true });
const lines = chunk.split('\n');
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || trimmed === 'data: [DONE]') continue;
try {
const json = JSON.parse(trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed);
const content = json.choices?.[0]?.delta?.content || json.message?.content || '';
responseText += content;
} catch (e) { }
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value, { stream: true });
const lines = chunk.split('\n');
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || trimmed === 'data: [DONE]') continue;
try {
const json = JSON.parse(trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed);
const content = json.choices?.[0]?.delta?.content || json.message?.content || '';
responseText += content;
} catch (e) { }
}
}
} finally {
try { reader.releaseLock(); } catch { /* already released */ }
}
return responseText;
}
@@ -1962,7 +2020,7 @@ export class AgentExecutor {
});
}
private buildMemoryContext(currentPrompt: string, activeBrain: BrainProfile): string {
private buildMemoryContext(currentPrompt: string, activeBrain: BrainProfile, agentSkillFile?: string): string {
const config = getConfig();
if (!config.memoryEnabled) return '';
@@ -1976,6 +2034,12 @@ export class AgentExecutor {
const workspaceFolders = vscode.workspace.workspaceFolders;
const workspacePath = workspaceFolders ? workspaceFolders[0].uri.fsPath : undefined;
// Resolve scope folders from the agent ↔ knowledge map. When the user
// hasn't selected an agent (or the selection has no mapping), `folders`
// is empty and the orchestrator falls back to whole-brain search —
// keeping the legacy behavior intact.
const scope = resolveScopeForAgent(agentSkillFile, activeBrain.localBrainPath);
// Use the Unified RAG Pipeline
const result = this.retrievalOrchestrator.retrieve(currentPrompt, {
brain: activeBrain,
@@ -1986,7 +2050,8 @@ export class AgentExecutor {
totalBudget: 8000,
retrievalRatio: 0.4
},
brainFileLimit: config.memoryLongTermFiles
brainFileLimit: config.memoryLongTermFiles,
scopeFolders: scope.folders
});
return this.retrievalOrchestrator.buildContextString(result);
@@ -2117,6 +2182,57 @@ export class AgentExecutor {
);
}
/**
* Non-streaming chat completion. Used as a recovery path when the
* streaming endpoint returns an empty response — common with LM Studio
* when a model is mid-load or the SSE channel drops.
*
* The body is consumed via `await response.text()` (single read), so
* there's no ReadableStream lock to release and no chance of the
* "lock() request could not be registered" error this method is helping
* to avoid.
*/
private async callNonStreaming(params: {
baseUrl: string;
modelName: string;
engine: 'lmstudio' | 'ollama';
messages: ChatMessage[];
temperature: number;
signal?: AbortSignal;
}): Promise<string> {
const { baseUrl, modelName, engine, messages, temperature, signal } = params;
const apiUrl = buildApiUrl(baseUrl, engine, 'chat');
const variants = this.buildEngineMessageVariants(messages, engine);
const body = {
model: modelName,
messages: variants[0].messages,
stream: false,
...(engine === 'lmstudio'
? { max_tokens: 4096, temperature }
: { options: { num_ctx: 32768, num_predict: 4096, temperature } }),
};
const response = await fetch(apiUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
signal,
});
if (!response.ok) {
const errText = await response.text().catch(() => '');
throw new Error(`Non-streaming fallback returned ${response.status}: ${summarizeText(errText, 200)}`);
}
const text = await response.text();
try {
const json = JSON.parse(text);
if (engine === 'lmstudio') {
return json?.choices?.[0]?.message?.content ?? '';
}
return json?.message?.content ?? json?.response ?? '';
} catch {
return '';
}
}
private normalizeMessages(messages: ChatMessage[]) {
return messages.map((message) => {
const normalizedContent = typeof message.content === 'string'
+15
View File
@@ -48,6 +48,19 @@ export class LMStudioStreamer implements IChatStreamer {
signal: req.signal,
});
// Bridge AbortSignal → prediction.cancel(): without this, an aborted
// request keeps generating on the LM Studio server. The orphaned
// prediction holds locks on the model handle, which is a known cause
// of "lock() request could not be registered" on the very next
// request — the reused handle is still bound to a dead prediction.
const onAbort = () => {
try { (prediction as any)?.cancel?.(); } catch { /* swallow — best effort */ }
};
if (req.signal) {
if (req.signal.aborted) onAbort();
else req.signal.addEventListener('abort', onAbort, { once: true });
}
try {
for await (const fragment of prediction as AsyncIterable<{ content: string }>) {
if (req.signal?.aborted) return;
@@ -59,6 +72,8 @@ export class LMStudioStreamer implements IChatStreamer {
if (err?.name === 'AbortError') return;
logError('LM Studio SDK chat stream failed.', { model: trimmedModel, error: err?.message ?? String(err) });
throw err;
} finally {
req.signal?.removeEventListener?.('abort', onAbort);
}
}
}
+22 -3
View File
@@ -16,6 +16,7 @@ import * as fs from 'fs';
import * as path from 'path';
import { BrainProfile } from '../config';
import { findBrainFiles, summarizeText } from '../utils';
import { isInside } from '../lib/paths';
import { MemoryManager } from '../memory';
import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types';
import { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from './scoring';
@@ -33,6 +34,14 @@ interface RetrievalOptions {
contextBudget?: Partial<ContextBudgetConfig>;
brainFileLimit?: number;
includeRawConversations?: boolean;
/**
* Optional absolute folder paths constraining brain-file search to those
* subtrees. When provided and non-empty, only brain files inside one of
* the folders are considered. Empty / undefined preserves whole-brain
* search (legacy behavior). Folders that escape the brain root are
* silently dropped by the caller (see `agentKnowledgeMap.resolveScopeForAgent`).
*/
scopeFolders?: string[];
}
export class RetrievalOrchestrator {
@@ -50,15 +59,21 @@ export class RetrievalOrchestrator {
fusionLog.push(`Expanded tokens: [${expandedTokens.slice(0, 15).join(', ')}]`);
// ── ① Brain File Search (TF-IDF enhanced) ──
const scopeFolders = options.scopeFolders ?? [];
const brainChunks = this.searchBrainFiles(
query,
expandedTokens,
options.brain,
options.brainFileLimit || 8,
options.includeRawConversations || false
options.includeRawConversations || false,
scopeFolders
);
allChunks.push(...brainChunks);
fusionLog.push(`Brain search: ${brainChunks.length} chunks found`);
fusionLog.push(
scopeFolders.length > 0
? `Brain search (scoped to ${scopeFolders.length} folder(s)): ${brainChunks.length} chunks`
: `Brain search: ${brainChunks.length} chunks found`
);
// ── ② Memory Layers ──
const memoryChunks = this.searchMemoryLayers(
@@ -106,10 +121,14 @@ export class RetrievalOrchestrator {
expandedTokens: string[],
brain: BrainProfile,
limit: number,
includeRaw: boolean
includeRaw: boolean,
scopeFolders: string[] = []
): RetrievalChunk[] {
try {
const scoped = (file: string) => scopeFolders.length === 0
|| scopeFolders.some((folder) => isInside(folder, file));
const allFiles = findBrainFiles(brain.localBrainPath)
.filter(scoped)
.filter((file) => includeRaw || !this.isRawConversation(path.relative(brain.localBrainPath, file)));
if (allFiles.length === 0) return [];
+1
View File
@@ -1807,6 +1807,7 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
internetEnabled: internet,
visionContent: imageFiles,
agentSkillContext,
agentSkillFile: typeof agentFile === 'string' ? agentFile : undefined,
negativePrompt,
designerContext,
secondBrainTraceEnabled: secondBrainTrace !== false,