feat: Ghost Response retry and PID logging for bridge (v2.80.16)
This commit is contained in:
+1
-1
@@ -2,7 +2,7 @@
|
||||
"name": "astra",
|
||||
"displayName": "Astra",
|
||||
"description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.",
|
||||
"version": "2.80.15",
|
||||
"version": "2.80.16",
|
||||
"publisher": "g1nation",
|
||||
"license": "MIT",
|
||||
"icon": "assets/icon.png",
|
||||
|
||||
+71
-10
@@ -565,6 +565,57 @@ export class AgentExecutor {
|
||||
}
|
||||
}
|
||||
|
||||
// 4.1 Check for Ghost Response (Empty response from LM Studio/Ollama despite 200 OK)
|
||||
if (!aiResponseText.trim() && request.engine === 'lmstudio' && loopDepth === 0) {
|
||||
logInfo('Empty response detected from LM Studio. Retrying with extreme compression...', { model: actualModel });
|
||||
|
||||
// Force extreme compression: system + last user only
|
||||
const sysMsg = messagesForRequest.find(m => m.role === 'system');
|
||||
const lastUserMsg = [...messagesForRequest].reverse().find(m => m.role === 'user');
|
||||
const extremeMessages = [
|
||||
...(sysMsg ? [sysMsg] : []),
|
||||
...(lastUserMsg ? [lastUserMsg] : [])
|
||||
];
|
||||
|
||||
const retryRequest = await this.createStreamingRequest({
|
||||
baseUrl: ollamaUrl,
|
||||
modelName: actualModel,
|
||||
reqMessages: extremeMessages,
|
||||
temperature
|
||||
});
|
||||
|
||||
if (retryRequest.response.ok) {
|
||||
const retryBody = retryRequest.response.body as any;
|
||||
const retryDecoder = new TextDecoder();
|
||||
let retryBuffer = '';
|
||||
|
||||
// Simple stream reader for retry
|
||||
const reader = retryBody.getReader();
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
retryBuffer += retryDecoder.decode(value, { stream: true });
|
||||
// ... simplified parsing for retry ...
|
||||
const lines = retryBuffer.split('\n');
|
||||
retryBuffer = lines.pop() || '';
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed || trimmed === 'data: [DONE]') continue;
|
||||
try {
|
||||
const raw = trimmed.startsWith('data:') ? trimmed.replace(/^data:\s*/, '') : trimmed;
|
||||
if (!raw || raw === '[DONE]') continue;
|
||||
const json = JSON.parse(raw);
|
||||
const token = json.choices?.[0]?.delta?.content || json.message?.content || json.response || '';
|
||||
if (token) {
|
||||
aiResponseText += token;
|
||||
this.webview?.postMessage({ type: 'streamUpdate', value: token });
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (this.isStaleRun(runId)) return;
|
||||
if (requestTimeoutHandle) {
|
||||
clearTimeout(requestTimeoutHandle);
|
||||
@@ -622,17 +673,27 @@ export class AgentExecutor {
|
||||
this.statusBarManager.updateStatus(AgentStatus.Executing);
|
||||
const report = await this.executeActions(aiResponseText, rootPath, activeBrain);
|
||||
if (!assistantContent.trim() && report.length === 0) {
|
||||
const totalChars2 = messagesForRequest.reduce((acc, m) => acc + String(m.content || '').length, 0);
|
||||
// 실제 전송에 사용된 메시지(request.finalMessages)를 기준으로 토큰 재계산
|
||||
const usedMessages = request.finalMessages || messagesForRequest;
|
||||
const totalChars2 = usedMessages.reduce((acc, m) => acc + String(m.content || '').length, 0);
|
||||
const estimatedTokens2 = Math.ceil(totalChars2 / 4);
|
||||
const isContextOverflow = estimatedTokens2 > 5000;
|
||||
logError('Model returned an empty response without actions.', { model: actualModel, engine, apiUrl, loopDepth, estimatedTokens: estimatedTokens2 });
|
||||
const isContextOverflow = estimatedTokens2 > 2500; // 3000 한도에 근접하면 오버플로우로 간주
|
||||
|
||||
logError('Model returned an empty response without actions.', {
|
||||
model: actualModel,
|
||||
engine: request.engine,
|
||||
apiUrl: request.apiUrl,
|
||||
loopDepth,
|
||||
estimatedTokens: estimatedTokens2,
|
||||
wasCompressed: usedMessages.length !== messagesForRequest.length || totalChars2 !== (messagesForRequest.reduce((a, m) => a + String(m.content || '').length, 0))
|
||||
});
|
||||
this.webview.postMessage({
|
||||
type: 'error',
|
||||
value: [
|
||||
'AI engine returned an empty response.',
|
||||
`Engine: ${engine} | Model: ${actualModel}`,
|
||||
`Engine: ${request.engine} | Model: ${actualModel}`,
|
||||
isContextOverflow
|
||||
? `Context overflow: ~${estimatedTokens2.toLocaleString()} tokens estimated. This model likely has a smaller context window.`
|
||||
? `Context overflow: ~${estimatedTokens2.toLocaleString()} tokens (actually sent). The model context window was likely exceeded even after compression.`
|
||||
: 'The request reached the LLM server, but no content was returned.',
|
||||
'',
|
||||
'**해결 방법:**',
|
||||
@@ -2008,7 +2069,7 @@ export class AgentExecutor {
|
||||
modelName: string;
|
||||
reqMessages: ChatMessage[];
|
||||
temperature: number;
|
||||
}): Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string }> {
|
||||
}): Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string; finalMessages: ChatMessage[] }> {
|
||||
const { baseUrl, modelName, reqMessages, temperature } = params;
|
||||
const primaryEngine = resolveEngine(baseUrl);
|
||||
const engines = primaryEngine === 'lmstudio' ? ['lmstudio', 'ollama'] as const : ['ollama', 'lmstudio'] as const;
|
||||
@@ -2032,7 +2093,7 @@ export class AgentExecutor {
|
||||
if (engine === 'lmstudio') {
|
||||
const totalCharsRaw = finalMessages.reduce((acc, m) => acc + String(m.content || '').length, 0);
|
||||
const estimatedTokensRaw = Math.ceil(totalCharsRaw / 4);
|
||||
const LM_CTX_SAFE_LIMIT = 3500; // 4096 n_ctx 기준 안전 마진
|
||||
const LM_CTX_SAFE_LIMIT = 3000; // 4096 n_ctx 기준 더 보수적인 안전 마진
|
||||
|
||||
if (estimatedTokensRaw > LM_CTX_SAFE_LIMIT) {
|
||||
logInfo('LM Studio proactive compression triggered.', {
|
||||
@@ -2098,7 +2159,7 @@ export class AgentExecutor {
|
||||
messages: finalMessages.map(m => ({ role: m.role, content: m.content })),
|
||||
stream: true,
|
||||
...(engine === 'lmstudio'
|
||||
? { max_tokens: Math.min(4096, Math.max(256, 3500 - estimatedTokens)), temperature }
|
||||
? { max_tokens: Math.min(4096, Math.max(256, 3000 - estimatedTokens)), temperature }
|
||||
: { options: { num_ctx: 32768, num_predict: 4096, temperature } }),
|
||||
};
|
||||
logInfo('AI streaming request started.', {
|
||||
@@ -2182,7 +2243,7 @@ export class AgentExecutor {
|
||||
|
||||
if (retryResponse.ok) {
|
||||
logInfo('n_ctx retry succeeded.', { apiUrl });
|
||||
return { response: retryResponse, engine, apiUrl };
|
||||
return { response: retryResponse, engine, apiUrl, finalMessages: compressedMessages };
|
||||
}
|
||||
logError('n_ctx retry also failed.', { status: retryResponse.status });
|
||||
}
|
||||
@@ -2193,7 +2254,7 @@ export class AgentExecutor {
|
||||
}
|
||||
|
||||
logInfo('AI streaming request connected.', { engine, variant: variant.name, apiUrl });
|
||||
return { response, engine, apiUrl };
|
||||
return { response, engine, apiUrl, finalMessages };
|
||||
} catch (error: any) {
|
||||
lastError = error instanceof Error ? error : new Error(String(error));
|
||||
logError('AI streaming request failed.', { engine, variant: variant.name, apiUrl, model: candidateModel, error: lastError.message });
|
||||
|
||||
+3
-3
@@ -74,7 +74,7 @@ export class BridgeServer {
|
||||
server.once('error', (err: any) => {
|
||||
if (err.code === 'EADDRINUSE') {
|
||||
// INFO 레벨: ERR 콘솔 오염 방지 (Extension Host가 console.error를 ERR로 표시)
|
||||
logInfo(`Bridge Port ${port} already in use. Trying port ${port + 1}...`);
|
||||
logInfo(`Bridge Port ${port} already in use. Trying port ${port + 1}... (Current PID: ${process.pid})`);
|
||||
server.close();
|
||||
if (this.server === server) {
|
||||
this.server = null;
|
||||
@@ -82,14 +82,14 @@ export class BridgeServer {
|
||||
this.start(port + 1);
|
||||
} else {
|
||||
// EADDRINUSE 외 진짜 에러만 logError
|
||||
logInfo(`Bridge server non-fatal error on port ${port}: ${err.code || err.message}`);
|
||||
logInfo(`Bridge server non-fatal error on port ${port}: ${err.code || err.message} (PID: ${process.pid})`);
|
||||
}
|
||||
});
|
||||
|
||||
// 성공 시 서버 참조 저장
|
||||
server.listen(port, '127.0.0.1', () => {
|
||||
this.server = server;
|
||||
logInfo(`Bridge server active on 127.0.0.1:${port}.`);
|
||||
logInfo(`Bridge server active on 127.0.0.1:${port} (PID: ${process.pid}).`);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user