Fix token limitation (n_ctx) error for Ollama models by expanding num_ctx to 16384

This commit is contained in:
Jay
2026-04-14 12:53:16 +09:00
parent 0c217e3275
commit bb33c6f78e
+3 -3
View File
@@ -759,7 +759,7 @@ class SidebarChatProvider implements vscode.WebviewViewProvider {
model: modelName || defaultModel,
messages: reqMessages,
stream: true,
options: { num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK }
options: { num_ctx: 16384, num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK }
};
// Attach images to the last user message for Ollama
if (images.length > 0) {
@@ -901,7 +901,7 @@ class SidebarChatProvider implements vscode.WebviewViewProvider {
stream: true,
...(isLMStudio
? { max_tokens: 4096, temperature: this._temperature, top_p: this._topP }
: { options: { num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } }),
: { options: { num_ctx: 16384, num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } }),
};
// 스트리밍: 웹뷰에 'streamStart' 로 빈 메시지 생성 후 'streamChunk'로 실시간 업데이트
@@ -970,7 +970,7 @@ class SidebarChatProvider implements vscode.WebviewViewProvider {
stream: false,
...(isLMStudio
? { max_tokens: 4096, temperature: this._temperature, top_p: this._topP }
: { options: { num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } }),
: { options: { num_ctx: 16384, num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } }),
}, { timeout });
aiMessage = isLMStudio