diff --git a/package.json b/package.json index 4e8b2e9..9c39d10 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "astra", "displayName": "Astra", "description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.", - "version": "2.80.15", + "version": "2.80.16", "publisher": "g1nation", "license": "MIT", "icon": "assets/icon.png", diff --git a/src/agent.ts b/src/agent.ts index a07189b..d09277e 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -565,6 +565,57 @@ export class AgentExecutor { } } + // 4.1 Check for Ghost Response (Empty response from LM Studio/Ollama despite 200 OK) + if (!aiResponseText.trim() && request.engine === 'lmstudio' && loopDepth === 0) { + logInfo('Empty response detected from LM Studio. Retrying with extreme compression...', { model: actualModel }); + + // Force extreme compression: system + last user only + const sysMsg = messagesForRequest.find(m => m.role === 'system'); + const lastUserMsg = [...messagesForRequest].reverse().find(m => m.role === 'user'); + const extremeMessages = [ + ...(sysMsg ? [sysMsg] : []), + ...(lastUserMsg ? [lastUserMsg] : []) + ]; + + const retryRequest = await this.createStreamingRequest({ + baseUrl: ollamaUrl, + modelName: actualModel, + reqMessages: extremeMessages, + temperature + }); + + if (retryRequest.response.ok) { + const retryBody = retryRequest.response.body as any; + const retryDecoder = new TextDecoder(); + let retryBuffer = ''; + + // Simple stream reader for retry + const reader = retryBody.getReader(); + while (true) { + const { done, value } = await reader.read(); + if (done) break; + retryBuffer += retryDecoder.decode(value, { stream: true }); + // ... simplified parsing for retry ... + const lines = retryBuffer.split('\n'); + retryBuffer = lines.pop() || ''; + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed || trimmed === 'data: [DONE]') continue; + try { + const raw = trimmed.startsWith('data:') ? trimmed.replace(/^data:\s*/, '') : trimmed; + if (!raw || raw === '[DONE]') continue; + const json = JSON.parse(raw); + const token = json.choices?.[0]?.delta?.content || json.message?.content || json.response || ''; + if (token) { + aiResponseText += token; + this.webview?.postMessage({ type: 'streamUpdate', value: token }); + } + } catch {} + } + } + } + } + if (this.isStaleRun(runId)) return; if (requestTimeoutHandle) { clearTimeout(requestTimeoutHandle); @@ -622,17 +673,27 @@ export class AgentExecutor { this.statusBarManager.updateStatus(AgentStatus.Executing); const report = await this.executeActions(aiResponseText, rootPath, activeBrain); if (!assistantContent.trim() && report.length === 0) { - const totalChars2 = messagesForRequest.reduce((acc, m) => acc + String(m.content || '').length, 0); + // 실제 전송에 사용된 메시지(request.finalMessages)를 기준으로 토큰 재계산 + const usedMessages = request.finalMessages || messagesForRequest; + const totalChars2 = usedMessages.reduce((acc, m) => acc + String(m.content || '').length, 0); const estimatedTokens2 = Math.ceil(totalChars2 / 4); - const isContextOverflow = estimatedTokens2 > 5000; - logError('Model returned an empty response without actions.', { model: actualModel, engine, apiUrl, loopDepth, estimatedTokens: estimatedTokens2 }); + const isContextOverflow = estimatedTokens2 > 2500; // 3000 한도에 근접하면 오버플로우로 간주 + + logError('Model returned an empty response without actions.', { + model: actualModel, + engine: request.engine, + apiUrl: request.apiUrl, + loopDepth, + estimatedTokens: estimatedTokens2, + wasCompressed: usedMessages.length !== messagesForRequest.length || totalChars2 !== (messagesForRequest.reduce((a, m) => a + String(m.content || '').length, 0)) + }); this.webview.postMessage({ type: 'error', value: [ 'AI engine returned an empty response.', - `Engine: ${engine} | Model: ${actualModel}`, + `Engine: ${request.engine} | Model: ${actualModel}`, isContextOverflow - ? `Context overflow: ~${estimatedTokens2.toLocaleString()} tokens estimated. This model likely has a smaller context window.` + ? `Context overflow: ~${estimatedTokens2.toLocaleString()} tokens (actually sent). The model context window was likely exceeded even after compression.` : 'The request reached the LLM server, but no content was returned.', '', '**해결 방법:**', @@ -2008,7 +2069,7 @@ export class AgentExecutor { modelName: string; reqMessages: ChatMessage[]; temperature: number; - }): Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string }> { + }): Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string; finalMessages: ChatMessage[] }> { const { baseUrl, modelName, reqMessages, temperature } = params; const primaryEngine = resolveEngine(baseUrl); const engines = primaryEngine === 'lmstudio' ? ['lmstudio', 'ollama'] as const : ['ollama', 'lmstudio'] as const; @@ -2032,7 +2093,7 @@ export class AgentExecutor { if (engine === 'lmstudio') { const totalCharsRaw = finalMessages.reduce((acc, m) => acc + String(m.content || '').length, 0); const estimatedTokensRaw = Math.ceil(totalCharsRaw / 4); - const LM_CTX_SAFE_LIMIT = 3500; // 4096 n_ctx 기준 안전 마진 + const LM_CTX_SAFE_LIMIT = 3000; // 4096 n_ctx 기준 더 보수적인 안전 마진 if (estimatedTokensRaw > LM_CTX_SAFE_LIMIT) { logInfo('LM Studio proactive compression triggered.', { @@ -2098,7 +2159,7 @@ export class AgentExecutor { messages: finalMessages.map(m => ({ role: m.role, content: m.content })), stream: true, ...(engine === 'lmstudio' - ? { max_tokens: Math.min(4096, Math.max(256, 3500 - estimatedTokens)), temperature } + ? { max_tokens: Math.min(4096, Math.max(256, 3000 - estimatedTokens)), temperature } : { options: { num_ctx: 32768, num_predict: 4096, temperature } }), }; logInfo('AI streaming request started.', { @@ -2182,7 +2243,7 @@ export class AgentExecutor { if (retryResponse.ok) { logInfo('n_ctx retry succeeded.', { apiUrl }); - return { response: retryResponse, engine, apiUrl }; + return { response: retryResponse, engine, apiUrl, finalMessages: compressedMessages }; } logError('n_ctx retry also failed.', { status: retryResponse.status }); } @@ -2193,7 +2254,7 @@ export class AgentExecutor { } logInfo('AI streaming request connected.', { engine, variant: variant.name, apiUrl }); - return { response, engine, apiUrl }; + return { response, engine, apiUrl, finalMessages }; } catch (error: any) { lastError = error instanceof Error ? error : new Error(String(error)); logError('AI streaming request failed.', { engine, variant: variant.name, apiUrl, model: candidateModel, error: lastError.message }); diff --git a/src/bridge.ts b/src/bridge.ts index e5f0972..78855d4 100644 --- a/src/bridge.ts +++ b/src/bridge.ts @@ -74,7 +74,7 @@ export class BridgeServer { server.once('error', (err: any) => { if (err.code === 'EADDRINUSE') { // INFO 레벨: ERR 콘솔 오염 방지 (Extension Host가 console.error를 ERR로 표시) - logInfo(`Bridge Port ${port} already in use. Trying port ${port + 1}...`); + logInfo(`Bridge Port ${port} already in use. Trying port ${port + 1}... (Current PID: ${process.pid})`); server.close(); if (this.server === server) { this.server = null; @@ -82,14 +82,14 @@ export class BridgeServer { this.start(port + 1); } else { // EADDRINUSE 외 진짜 에러만 logError - logInfo(`Bridge server non-fatal error on port ${port}: ${err.code || err.message}`); + logInfo(`Bridge server non-fatal error on port ${port}: ${err.code || err.message} (PID: ${process.pid})`); } }); // 성공 시 서버 참조 저장 server.listen(port, '127.0.0.1', () => { this.server = server; - logInfo(`Bridge server active on 127.0.0.1:${port}.`); + logInfo(`Bridge server active on 127.0.0.1:${port} (PID: ${process.pid}).`); }); }