Fix token limitation (n_ctx) error for Ollama models by expanding num_ctx to 16384

2026-04-14 12:53:16 +09:00
parent 0c217e3275
commit bb33c6f78e
1 changed files with 3 additions and 3 deletions
@@ -759,7 +759,7 @@ class SidebarChatProvider implements vscode.WebviewViewProvider {
                    model: modelName || defaultModel,
                    messages: reqMessages,
                    stream: true,
-                    options: { num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK }
+                    options: { num_ctx: 16384, num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK }
                };
                // Attach images to the last user message for Ollama
                if (images.length > 0) {
@@ -901,7 +901,7 @@ class SidebarChatProvider implements vscode.WebviewViewProvider {
                stream: true,
                ...(isLMStudio 
                    ? { max_tokens: 4096, temperature: this._temperature, top_p: this._topP } 
-                    : { options: { num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } }),
+                    : { options: { num_ctx: 16384, num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } }),
            };

            // 스트리밍: 웹뷰에 'streamStart' 로 빈 메시지 생성 후 'streamChunk'로 실시간 업데이트
@@ -970,7 +970,7 @@ class SidebarChatProvider implements vscode.WebviewViewProvider {
                    stream: false,
                    ...(isLMStudio 
                        ? { max_tokens: 4096, temperature: this._temperature, top_p: this._topP } 
-                        : { options: { num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } }),
+                        : { options: { num_ctx: 16384, num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } }),
                }, { timeout });

                aiMessage = isLMStudio