From 51c92a4693957deca56ff514e2d917d4ceaacc85 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 14 Apr 2026 10:42:37 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20multimodal=20file=20attachment=20(+)=20?= =?UTF-8?q?button=20=E2=80=94=20send=20images,=20audio,=20documents=20to?= =?UTF-8?q?=20AI=20vision=20models=20(v2.0.0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 2 +- src/extension.ts | 225 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 222 insertions(+), 5 deletions(-) diff --git a/package.json b/package.json index 249b11d..e70c77e 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "connect-ai-lab", "displayName": "Connect AI", "description": "100% 로컬 AI 코딩 에이전트 — 파일 생성, 코드 편집, 터미널 실행을 오프라인으로. Ollama + Gemma/Llama/DeepSeek 지원.", - "version": "1.0.32", + "version": "2.0.0", "publisher": "connectailab", "license": "MIT", "icon": "assets/icon.png", diff --git a/src/extension.ts b/src/extension.ts index 3b3d09f..23d67e5 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -246,6 +246,9 @@ class SidebarChatProvider implements vscode.WebviewViewProvider { case 'prompt': await this._handlePrompt(msg.value, msg.model); break; + case 'promptWithFile': + await this._handlePromptWithFile(msg.value, msg.model, msg.files); + break; case 'newChat': this.resetChat(); break; @@ -633,6 +636,155 @@ class SidebarChatProvider implements vscode.WebviewViewProvider { return result; } + // -------------------------------------------------------- + // Handle prompt with file attachments (multimodal) + // -------------------------------------------------------- + private async _handlePromptWithFile(prompt: string, modelName: string, files: {name: string, type: string, data: string}[]) { + if (!this._view) { return; } + + try { + const { ollamaBase, defaultModel, timeout } = getConfig(); + let isLMStudio = ollamaBase.includes('1234') || ollamaBase.includes('v1'); + let apiUrl = isLMStudio ? `${ollamaBase}/v1/chat/completions` : `${ollamaBase}/api/chat`; + + if (!isLMStudio) { + try { await axios.get(`${ollamaBase}/api/tags`, { timeout: 1000 }); } + catch { apiUrl = 'http://127.0.0.1:1234/v1/chat/completions'; isLMStudio = true; } + } + + // Separate images from text files + const imageFiles = files.filter(f => f.type.startsWith('image/')); + const textFiles = files.filter(f => !f.type.startsWith('image/')); + + // Build text context from non-image files + let fileContext = ''; + for (const f of textFiles) { + // data is base64 encoded, decode to utf-8 text + const decoded = Buffer.from(f.data, 'base64').toString('utf-8'); + fileContext += `\n\n[첨부 파일: ${f.name}]\n\`\`\`\n${decoded.slice(0, 20000)}\n\`\`\``; + } + + const userContent = prompt + fileContext; + this._chatHistory.push({ role: 'user', content: userContent }); + this._displayMessages.push({ text: prompt + (files.length > 0 ? `\n📎 ${files.map(f=>f.name).join(', ')}` : ''), role: 'user' }); + + // Build messages + const reqMessages = [...this._chatHistory]; + if (reqMessages.length > 0 && reqMessages[0].role === 'system') { + const editor = vscode.window.activeTextEditor; + let contextBlock = ''; + if (editor && editor.document.uri.scheme === 'file') { + const text = editor.document.getText(); + const name = path.basename(editor.document.fileName); + if (text.trim().length > 0 && text.length < MAX_CONTEXT_SIZE) { + contextBlock = `\n\n[Currently open file: ${name}]\n\`\`\`\n${text}\n\`\`\``; + } + } + const workspaceCtx = this._getWorkspaceContext(); + const brainCtx = this._brainEnabled ? this._getSecondBrainContext() : ''; + reqMessages[0] = { + role: 'system', + content: `${this._systemPrompt}\n\n[BACKGROUND CONTEXT]\n${contextBlock}\n${workspaceCtx}\n${brainCtx}` + }; + } + + // Build image payload for vision models + const images = imageFiles.map(f => f.data); // already base64 + + let aiMessage = ''; + this._view.webview.postMessage({ type: 'streamStart' }); + + if (isLMStudio) { + // OpenAI-compatible format with image_url + const lastUserMsg = reqMessages[reqMessages.length - 1]; + const contentParts: any[] = [{ type: 'text', text: lastUserMsg.content }]; + for (const img of images) { + contentParts.push({ type: 'image_url', image_url: { url: `data:image/png;base64,${img}` } }); + } + reqMessages[reqMessages.length - 1] = { role: 'user', content: contentParts as any }; + + const streamBody = { + model: modelName || defaultModel, + messages: reqMessages, + stream: true, + max_tokens: 4096, temperature: this._temperature, top_p: this._topP + }; + const response = await axios.post(apiUrl, streamBody, { timeout, responseType: 'stream' }); + await new Promise((resolve, reject) => { + const stream = response.data; + let buffer = ''; + stream.on('data', (chunk: Buffer) => { + buffer += chunk.toString(); + const lines = buffer.split('\n'); buffer = lines.pop() || ''; + for (const line of lines) { + if (!line.trim() || line.trim() === 'data: [DONE]') continue; + try { + const raw = line.startsWith('data: ') ? line.slice(6) : line; + const json = JSON.parse(raw); + const token = json.choices?.[0]?.delta?.content || ''; + if (token) { aiMessage += token; this._view!.webview.postMessage({ type: 'streamChunk', value: token }); } + } catch {} + } + }); + stream.on('end', () => resolve()); + stream.on('error', (err: any) => reject(err)); + }); + } else { + // Ollama native format with images array + const streamBody: any = { + model: modelName || defaultModel, + messages: reqMessages, + stream: true, + options: { num_predict: 4096, temperature: this._temperature, top_p: this._topP, top_k: this._topK } + }; + // Attach images to the last user message for Ollama + if (images.length > 0) { + streamBody.messages = reqMessages.map((m: any, i: number) => + i === reqMessages.length - 1 ? { ...m, images } : m + ); + } + const response = await axios.post(apiUrl, streamBody, { timeout, responseType: 'stream' }); + await new Promise((resolve, reject) => { + const stream = response.data; + let buffer = ''; + stream.on('data', (chunk: Buffer) => { + buffer += chunk.toString(); + const lines = buffer.split('\n'); buffer = lines.pop() || ''; + for (const line of lines) { + if (!line.trim()) continue; + try { + const json = JSON.parse(line); + const token = json.message?.content || ''; + if (token) { aiMessage += token; this._view!.webview.postMessage({ type: 'streamChunk', value: token }); } + } catch {} + } + }); + stream.on('end', () => resolve()); + stream.on('error', (err: any) => reject(err)); + }); + } + + this._view.webview.postMessage({ type: 'streamEnd' }); + this._chatHistory.push({ role: 'assistant', content: aiMessage }); + + const report = this._executeActions(aiMessage); + if (report.length > 0) { + const reportMsg = `\n\n---\n**에이전트 작업 결과**\n${report.join('\n')}`; + this._view.webview.postMessage({ type: 'streamChunk', value: reportMsg }); + this._view.webview.postMessage({ type: 'streamEnd' }); + aiMessage += reportMsg; + } + this._displayMessages.push({ text: aiMessage, role: 'ai' }); + this._saveHistory(); + + } catch (error: any) { + const errMsg = error.code === 'ECONNREFUSED' + ? '⚠️ AI 서버에 연결할 수 없습니다. 로컬 서버를 켜주세요.' + : `⚠️ 오류: ${error.message}`; + this._view.webview.postMessage({ type: 'error', value: errMsg }); + } + } + // -------------------------------------------------------- // Handle user prompt → Ollama → agent actions → response // -------------------------------------------------------- @@ -1077,6 +1229,18 @@ textarea::placeholder{color:var(--text-dim)} body.init .main-view{justify-content:center;margin-top:-6vh} body.init .chat{flex:0 0 auto;overflow:visible;padding-bottom:15px} body.init .input-wrap{max-width:680px;width:100%;margin:0 auto;transform:none;transition:all .5s cubic-bezier(.16,1,.3,1)} + +/* ATTACHMENT */ +.attach-btn{background:transparent;border:1px solid var(--border2);color:var(--text-dim);width:32px;height:32px;border-radius:10px;cursor:pointer;display:flex;align-items:center;justify-content:center;font-size:16px;transition:all .3s;flex-shrink:0} +.attach-btn:hover{color:var(--accent);border-color:var(--accent);box-shadow:0 0 12px var(--accent-glow);transform:translateY(-1px)} +.attach-preview{display:none;gap:6px;padding:0 0 6px;flex-wrap:wrap} +.attach-preview.visible{display:flex} +.attach-chip{display:flex;align-items:center;gap:5px;background:var(--surface2);border:1px solid var(--border2);border-radius:8px;padding:4px 10px;font-size:10px;color:var(--text);animation:msgIn .3s ease} +.attach-chip .chip-icon{font-size:12px} +.attach-chip .chip-name{max-width:100px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap} +.attach-chip .chip-remove{cursor:pointer;color:var(--text-dim);font-size:12px;margin-left:2px;transition:color .2s} +.attach-chip .chip-remove:hover{color:var(--red)} +.attach-thumb{width:28px;height:28px;border-radius:5px;object-fit:cover;border:1px solid var(--border2)}
Connect AI
@@ -1087,16 +1251,19 @@ body.init .input-wrap{max-width:680px;width:100%;margin:0 auto;transform:none;tr
\ubcf4\uc548 \u00b7 \ube44\uc6a9\ucd5c\uc801\ud654 \u00b7 \uc9c0\uc2dd\uc5f0\uacb0
\ud504\ub85c\uc81d\ud2b8\ub97c \uc774\ud574\ud558\uace0, \ucf54\ub4dc\ub97c \uc791\uc131\ud558\uace0, \uc2e4\ud589\ud569\ub2c8\ub2e4.
+
+
+