Fix token limitation (n_ctx) error for Ollama models by expanding num_ctx to 16384
This commit is contained in:
+3
-3
@@ -759,7 +759,7 @@ class SidebarChatProvider implements vscode.WebviewViewProvider {
|
||||
model: modelName || defaultModel,
|
||||
messages: reqMessages,
|
||||
stream: true,
|
||||
options: { num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK }
|
||||
options: { num_ctx: 16384, num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK }
|
||||
};
|
||||
// Attach images to the last user message for Ollama
|
||||
if (images.length > 0) {
|
||||
@@ -901,7 +901,7 @@ class SidebarChatProvider implements vscode.WebviewViewProvider {
|
||||
stream: true,
|
||||
...(isLMStudio
|
||||
? { max_tokens: 4096, temperature: this._temperature, top_p: this._topP }
|
||||
: { options: { num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } }),
|
||||
: { options: { num_ctx: 16384, num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } }),
|
||||
};
|
||||
|
||||
// 스트리밍: 웹뷰에 'streamStart' 로 빈 메시지 생성 후 'streamChunk'로 실시간 업데이트
|
||||
@@ -970,7 +970,7 @@ class SidebarChatProvider implements vscode.WebviewViewProvider {
|
||||
stream: false,
|
||||
...(isLMStudio
|
||||
? { max_tokens: 4096, temperature: this._temperature, top_p: this._topP }
|
||||
: { options: { num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } }),
|
||||
: { options: { num_ctx: 16384, num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } }),
|
||||
}, { timeout });
|
||||
|
||||
aiMessage = isLMStudio
|
||||
|
||||
Reference in New Issue
Block a user