chore: version bump to 2.80.9 and fix bridge/lmstudio issues
This commit is contained in:
+121
-40
@@ -451,35 +451,74 @@ export class AgentExecutor {
|
||||
if (this.isStaleRun(runId)) return;
|
||||
|
||||
let aiResponseText = '';
|
||||
const reader = response.body?.getReader();
|
||||
if (!reader) throw new Error("Response body is not readable.");
|
||||
const body = response.body as any;
|
||||
if (!body) throw new Error("Response body is null.");
|
||||
|
||||
if (loopDepth === 0) this.webview.postMessage({ type: 'streamStart' });
|
||||
if (loopDepth === 0) this.webview?.postMessage({ type: 'streamStart' });
|
||||
|
||||
let buffer = '';
|
||||
const decoder = new TextDecoder();
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
if (this.isStaleRun(runId)) return;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed || trimmed === 'data: [DONE]') continue;
|
||||
try {
|
||||
const raw = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
|
||||
const json = JSON.parse(raw);
|
||||
const token = engine === 'lmstudio' ? json.choices?.[0]?.delta?.content || '' : json.message?.content || json.response || '';
|
||||
if (token) {
|
||||
aiResponseText += token;
|
||||
}
|
||||
} catch (e: any) {
|
||||
logError('Failed to parse streaming chunk.', { engine, apiUrl, chunk: summarizeText(trimmed, 300), error: e?.message || String(e) });
|
||||
const processChunk = (value: any) => {
|
||||
if (this.isStaleRun(runId)) return false;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed || trimmed === 'data: [DONE]') continue;
|
||||
|
||||
try {
|
||||
let raw = trimmed;
|
||||
if (trimmed.startsWith('data:')) {
|
||||
raw = trimmed.replace(/^data:\s*/, '');
|
||||
}
|
||||
|
||||
if (!raw || raw === '[DONE]') continue;
|
||||
|
||||
const json = JSON.parse(raw);
|
||||
if (json.error) {
|
||||
const errMsg = typeof json.error === 'string' ? json.error : (json.error.message || JSON.stringify(json.error));
|
||||
throw new Error(`AI Engine Error: ${errMsg}`);
|
||||
}
|
||||
|
||||
let token = '';
|
||||
if (json.choices?.[0]) {
|
||||
const choice = json.choices[0];
|
||||
token = choice.delta?.content || choice.message?.content || choice.text || '';
|
||||
} else if (json.message?.content) {
|
||||
token = json.message.content;
|
||||
} else if (json.response) {
|
||||
token = json.response;
|
||||
}
|
||||
|
||||
if (token) {
|
||||
aiResponseText += token;
|
||||
if (loopDepth === 0) {
|
||||
this.webview?.postMessage({ type: 'streamUpdate', value: token });
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
// Silent fail for non-JSON lines unless it's an AI Engine Error
|
||||
if (e.message.startsWith('AI Engine Error:')) throw e;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
try {
|
||||
if (typeof body[Symbol.asyncIterator] === 'function') {
|
||||
for await (const chunk of body) {
|
||||
if (!processChunk(chunk)) break;
|
||||
}
|
||||
} else {
|
||||
const reader = body.getReader();
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
if (!processChunk(value)) break;
|
||||
}
|
||||
}
|
||||
} catch (err: any) {
|
||||
@@ -495,11 +534,30 @@ export class AgentExecutor {
|
||||
if (buffer.trim() && buffer.trim() !== 'data: [DONE]') {
|
||||
try {
|
||||
const trimmed = buffer.trim();
|
||||
const raw = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
|
||||
const json = JSON.parse(raw);
|
||||
const token = engine === 'lmstudio' ? json.choices?.[0]?.delta?.content || '' : json.message?.content || json.response || '';
|
||||
if (token) {
|
||||
aiResponseText += token;
|
||||
let raw = trimmed;
|
||||
if (trimmed.startsWith('data:')) {
|
||||
raw = trimmed.replace(/^data:\s*/, '');
|
||||
}
|
||||
|
||||
if (raw && raw !== '[DONE]') {
|
||||
const json = JSON.parse(raw);
|
||||
if (json.error) {
|
||||
const errMsg = typeof json.error === 'string' ? json.error : (json.error.message || JSON.stringify(json.error));
|
||||
throw new Error(`AI Engine Error: ${errMsg}`);
|
||||
}
|
||||
let token = '';
|
||||
if (json.choices?.[0]) {
|
||||
const choice = json.choices[0];
|
||||
token = choice.delta?.content || choice.message?.content || choice.text || '';
|
||||
} else if (json.message?.content) {
|
||||
token = json.message.content;
|
||||
} else if (json.response) {
|
||||
token = json.response;
|
||||
}
|
||||
|
||||
if (token) {
|
||||
aiResponseText += token;
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
logError('Failed to parse final streaming buffer.', { engine, apiUrl, buffer: summarizeText(buffer, 300), error: e?.message || String(e) });
|
||||
@@ -556,24 +614,36 @@ export class AgentExecutor {
|
||||
*/
|
||||
const finalAssistantContent = assistantContent;
|
||||
|
||||
const assistantMessage: ChatMessage = { role: 'assistant', content: finalAssistantContent, internal: false, rationale };
|
||||
this.chatHistory.push(assistantMessage);
|
||||
this.emitHistoryChanged();
|
||||
|
||||
this.statusBarManager.updateStatus(AgentStatus.Executing);
|
||||
const report = await this.executeActions(aiResponseText, rootPath, activeBrain);
|
||||
if (!assistantContent.trim() && report.length === 0) {
|
||||
logError('Model returned an empty response without actions.', { model: actualModel, engine, apiUrl, loopDepth });
|
||||
const totalChars2 = messagesForRequest.reduce((acc, m) => acc + String(m.content || '').length, 0);
|
||||
const estimatedTokens2 = Math.ceil(totalChars2 / 4);
|
||||
const isContextOverflow = estimatedTokens2 > 5000;
|
||||
logError('Model returned an empty response without actions.', { model: actualModel, engine, apiUrl, loopDepth, estimatedTokens: estimatedTokens2 });
|
||||
this.webview.postMessage({
|
||||
type: 'error',
|
||||
value: [
|
||||
'AI engine returned an empty response.',
|
||||
`Engine: ${engine}`,
|
||||
`Model: ${actualModel}`,
|
||||
'The request reached the local LLM server, but no usable content was returned. Try another model, restart the local server, or reduce the prompt/context size.'
|
||||
`Engine: ${engine} | Model: ${actualModel}`,
|
||||
isContextOverflow
|
||||
? `Context overflow: ~${estimatedTokens2.toLocaleString()} tokens estimated. This model likely has a smaller context window.`
|
||||
: 'The request reached the LLM server, but no content was returned.',
|
||||
'',
|
||||
'**해결 방법:**',
|
||||
isContextOverflow
|
||||
? '1. Brain 비활성화 후 재시도 2. 더 큰 모델(7B+) 사용 3. 대화 기록 초기화 후 재시도'
|
||||
: '1. LM Studio에서 해당 모델이 로드되어 있는지 확인 2. 모델 재시작 후 재시도 3. 다른 모델로 전환'
|
||||
].join('\n')
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (report.length > 0) {
|
||||
this.emitHistoryChanged();
|
||||
logInfo('Agent actions executed.', { loopDepth: loopDepth + 1, report });
|
||||
|
||||
// Continue loop if needed
|
||||
@@ -590,7 +660,7 @@ export class AgentExecutor {
|
||||
logInfo('Autonomous loop continuing after actions.', { loopDepth: loopDepth + 1, actions: report });
|
||||
|
||||
// Explicitly tell the AI to look at the results and continue
|
||||
const continuationPrompt = "The requested local action has been executed. Use the action result messages already in the conversation to answer the user's original request directly, in the user's language. Do not say you are waiting for the next instruction.";
|
||||
const continuationPrompt = `The requested local action has been executed.\nAction report:\n${report.join('\n')}\nUse the action result messages already in the conversation to answer the user's original request directly, in the user's language. Do not say you are waiting for the next instruction.`;
|
||||
|
||||
this.webview.postMessage({ type: 'autoContinue', value: `자료를 확인하고 답변을 정리하는 중입니다... (${loopDepth + 1}/${config.maxAutoSteps})` });
|
||||
await new Promise(r => setTimeout(r, 800));
|
||||
@@ -600,9 +670,6 @@ export class AgentExecutor {
|
||||
return;
|
||||
}
|
||||
|
||||
const assistantMessage: ChatMessage = { role: 'assistant', content: finalAssistantContent, internal: false, rationale };
|
||||
this.chatHistory.push(assistantMessage);
|
||||
this.emitHistoryChanged();
|
||||
this.statusBarManager.updateStatus(AgentStatus.Success);
|
||||
this.webview.postMessage({ type: 'streamChunk', value: finalAssistantContent });
|
||||
|
||||
@@ -1953,14 +2020,29 @@ export class AgentExecutor {
|
||||
|
||||
for (const candidateModel of modelCandidates) {
|
||||
for (const variant of messageVariants) {
|
||||
// LM Studio: context_length를 명시적으로 제한하여 컨텍스트 초과 방지
|
||||
// 총 메시지 토큰 추정: 문자 수 / 4 (rough estimate)
|
||||
const totalChars = variant.messages.reduce((acc, m) => acc + String(m.content || '').length, 0);
|
||||
const estimatedTokens = Math.ceil(totalChars / 4);
|
||||
// LM Studio 소형 모델(4B~8B)은 4096~8192 context 제한
|
||||
// 컨텍스트 초과 시 max_tokens을 줄여서 모델이 응답할 공간 확보
|
||||
const lmStudioMaxTokens = Math.max(512, Math.min(4096, 8192 - estimatedTokens));
|
||||
const streamBody = {
|
||||
model: candidateModel,
|
||||
messages: variant.messages,
|
||||
stream: true,
|
||||
...(engine === 'lmstudio'
|
||||
? { max_tokens: 4096, temperature }
|
||||
? {
|
||||
max_tokens: lmStudioMaxTokens,
|
||||
temperature,
|
||||
// LM Studio: context_length로 컨텍스트 창 명시 설정
|
||||
context_length: 8192
|
||||
}
|
||||
: { options: { num_ctx: 32768, num_predict: 4096, temperature } }),
|
||||
};
|
||||
if (engine === 'lmstudio' && estimatedTokens > 6000) {
|
||||
logError('LM Studio context may be too large for small models.', { estimatedTokens, lmStudioMaxTokens, model: candidateModel });
|
||||
}
|
||||
|
||||
try {
|
||||
logInfo('AI streaming request started.', {
|
||||
@@ -1981,8 +2063,7 @@ export class AgentExecutor {
|
||||
'Connection': 'keep-alive'
|
||||
},
|
||||
body: JSON.stringify(streamBody),
|
||||
signal: this.abortController?.signal,
|
||||
keepalive: true
|
||||
signal: this.abortController?.signal
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
|
||||
+14
-5
@@ -39,7 +39,7 @@ export class BridgeServer {
|
||||
}
|
||||
|
||||
public start(port: number = 4825) {
|
||||
this.server = http.createServer((req, res) => {
|
||||
const server = http.createServer((req, res) => {
|
||||
res.setHeader('Access-Control-Allow-Origin', '*');
|
||||
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
|
||||
res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
|
||||
@@ -70,15 +70,24 @@ export class BridgeServer {
|
||||
}
|
||||
});
|
||||
|
||||
this.server.on('error', (err: any) => {
|
||||
// once() 사용: 중복 에러 이벤트 방지
|
||||
server.once('error', (err: any) => {
|
||||
if (err.code === 'EADDRINUSE') {
|
||||
logError(`🚫 Bridge Port ${port} in use. Connection with EZER/A.U might fail.`);
|
||||
logInfo(`Bridge Port ${port} already in use. Trying port ${port + 1}...`);
|
||||
// 기존 서버 참조 정리 후 다음 포트 시도
|
||||
server.close();
|
||||
if (this.server === server) {
|
||||
this.server = null;
|
||||
}
|
||||
this.start(port + 1);
|
||||
} else {
|
||||
logError(`Bridge server error:`, err);
|
||||
logError(`Bridge server error on port ${port}:`, err);
|
||||
}
|
||||
});
|
||||
|
||||
this.server.listen(port, '127.0.0.1', () => {
|
||||
// 성공 시 서버 참조 저장
|
||||
server.listen(port, '127.0.0.1', () => {
|
||||
this.server = server;
|
||||
logInfo(`Bridge server active on 127.0.0.1:${port}.`);
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user