From 8ece47f9616246c0dcf0980a825245d73cf924a3 Mon Sep 17 00:00:00 2001 From: g1nation Date: Wed, 6 May 2026 11:55:45 +0900 Subject: [PATCH] PDFVisionFallback --- ...d46d2ca2057b05c488be1dcf439166ac5a9a1.json | 2 +- ...9f4f39d2bc368f77456c37b5eef9a94a66b5c.json | 2 +- ...5c7a44d7661af673b24e3f49551a7a2e50280.json | 2 +- ...adc543795e4b427b64540a49c9ab27c7fe213.json | 4 +- ...son => stress_conflict_1778035649232.json} | 16 +++---- src/agent.ts | 29 +++++++++---- src/sidebarProvider.ts | 42 +++++++++++++++---- 7 files changed, 68 insertions(+), 29 deletions(-) rename .astra/tests/stress/.astra/missions/{stress_conflict_1778033752447.json => stress_conflict_1778035649232.json} (82%) diff --git a/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json b/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json index 9e57e5b..6e10851 100644 --- a/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json +++ b/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json @@ -1,5 +1,5 @@ { "result": "Final report with inconsistencies. This should be long enough to pass validation.", - "createdAt": 1778033752470, + "createdAt": 1778035649255, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json b/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json index ef1ec7f..9b940ca 100644 --- a/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json +++ b/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json @@ -1,5 +1,5 @@ { "result": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.", - "createdAt": 1778033752468, + "createdAt": 1778035649254, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json b/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json index 1bcd65a..7da05a9 100644 --- a/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json +++ b/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json @@ -1,5 +1,5 @@ { "result": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.", - "createdAt": 1778033752466, + "createdAt": 1778035649252, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json b/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json index d2c00bf..0c244d7 100644 --- a/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json +++ b/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json @@ -1,5 +1,5 @@ { - "result": "---\nid: stress_conflict_1778033752447\ndate: 2026-05-06T02:15:52.471Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (18ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (2ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (2ms)\n", - "createdAt": 1778033752471, + "result": "---\nid: stress_conflict_1778035649232\ndate: 2026-05-06T02:47:29.256Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (19ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (2ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (2ms)\n", + "createdAt": 1778035649257, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/missions/stress_conflict_1778033752447.json b/.astra/tests/stress/.astra/missions/stress_conflict_1778035649232.json similarity index 82% rename from .astra/tests/stress/.astra/missions/stress_conflict_1778033752447.json rename to .astra/tests/stress/.astra/missions/stress_conflict_1778035649232.json index 1c472f4..5a0eb03 100644 --- a/.astra/tests/stress/.astra/missions/stress_conflict_1778033752447.json +++ b/.astra/tests/stress/.astra/missions/stress_conflict_1778035649232.json @@ -1,7 +1,7 @@ { - "missionId": "stress_conflict_1778033752447", + "missionId": "stress_conflict_1778035649232", "status": "completed", - "startTime": "2026-05-06T02:15:52.447Z", + "startTime": "2026-05-06T02:47:29.232Z", "totalElapsedMs": 25, "results": { "planner": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.", @@ -16,30 +16,30 @@ { "from": "idle", "to": "planner", - "durationMs": 18, + "durationMs": 19, "message": "전략 수립 중...", - "ts": "2026-05-06T02:15:52.465Z" + "ts": "2026-05-06T02:47:29.251Z" }, { "from": "planner", "to": "researcher", "durationMs": 2, "message": "핵심 정보 수집 및 분석 중...", - "ts": "2026-05-06T02:15:52.467Z" + "ts": "2026-05-06T02:47:29.253Z" }, { "from": "researcher", "to": "writer", "durationMs": 2, "message": "최종 리포트 작성 및 편집 중...", - "ts": "2026-05-06T02:15:52.469Z" + "ts": "2026-05-06T02:47:29.255Z" }, { "from": "writer", "to": "completed", - "durationMs": 3, + "durationMs": 2, "message": "미션 완료", - "ts": "2026-05-06T02:15:52.472Z" + "ts": "2026-05-06T02:47:29.257Z" } ], "resilienceMetrics": { diff --git a/src/agent.ts b/src/agent.ts index bc6d369..63cb943 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -338,18 +338,28 @@ export class AgentExecutor { const reqMessages = this.buildRequestHistory(this.chatHistory); // Handle Vision Content Injection - // Merge text prompt with file content instead of replacing, so the user's message is never lost + // visionContent 배열에서 이미지 base64 데이터를 추출하여 엔진에 맞는 형식으로 주입 if (hasVisionContent && reqMessages.length > 0) { const lastUserIdx = reqMessages.map(m => m.role).lastIndexOf('user'); if (lastUserIdx >= 0) { const existingContent = reqMessages[lastUserIdx].content; - const textParts: any[] = (typeof existingContent === 'string' && existingContent.trim()) - ? [{ type: 'text', text: existingContent }] - : []; + const textContent = (typeof existingContent === 'string' && existingContent.trim()) ? existingContent : ''; + + // base64 이미지 데이터 추출 + const imageBase64List: string[] = []; + for (const vc of (visionContent || [])) { + if (vc && vc.data) { + imageBase64List.push(vc.data); + } + } + + // Ollama 호환: images 배열 필드에 base64 데이터 직접 주입 + // LM Studio 호환: content 배열에 image_url 객체 주입 reqMessages[lastUserIdx] = { role: 'user', - content: JSON.stringify([...textParts, ...(visionContent || [])]) - }; + content: textContent, + images: imageBase64List // Ollama native format + } as any; } } @@ -1925,10 +1935,15 @@ export class AgentExecutor { ? message.content : JSON.stringify(message.content); - return { + const result: any = { role: message.role, content: normalizedContent }; + // Ollama Vision: images 필드 보존 + if ((message as any).images) { + result.images = (message as any).images; + } + return result; }); } diff --git a/src/sidebarProvider.ts b/src/sidebarProvider.ts index db2c213..6fb8335 100644 --- a/src/sidebarProvider.ts +++ b/src/sidebarProvider.ts @@ -1851,7 +1851,8 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn const type = file.type || ''; if (name.endsWith('.pdf') || type === 'application/pdf') { - // PDF: 서버사이드 텍스트 추출 (pdf-parse v2 API) + // PDF: 서버사이드 텍스트 추출 (pdf-parse v2 API) + Vision 폴백 + let pdfTextOk = false; try { const { PDFParse } = require('pdf-parse'); const rawBuffer = Buffer.from(file.data, 'base64'); @@ -1859,20 +1860,43 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn const parser = new PDFParse(uint8); await parser.load(); const textResult = await parser.getText(); - // pdf-parse v2: getText() returns {pages: [{text, num}], text: string, total: number} const extracted = (typeof textResult === 'string' ? textResult : (textResult?.text || '')).trim(); - // 페이지 구분 마커 제거하여 깔끔한 텍스트 추출 const cleanText = extracted.replace(/\n*-- \d+ of \d+ --\n*/g, '\n').trim(); - if (cleanText && cleanText.length > 10) { + if (cleanText && cleanText.length > 30) { textContents.push(`\n[PDF: ${file.name}]\n${cleanText}`); logInfo(`PDF text extracted successfully.`, { fileName: file.name, chars: cleanText.length }); - } else { - textContents.push(`\n[PDF: ${file.name}]\n(텍스트 추출 결과 없음 - 이미지 기반 PDF일 수 있습니다. 텍스트 레이어가 없는 스캔 문서는 OCR 변환 후 재시도하세요.)`); - logInfo(`PDF text extraction returned empty/minimal result.`, { fileName: file.name, rawLength: extracted.length }); + pdfTextOk = true; + } + + // [Vision Fallback] 텍스트가 비어있으면 페이지 이미지 추출 -> Vision 모델에 전달 + if (!pdfTextOk) { + logInfo(`PDF has no text layer. Extracting page screenshots for vision analysis.`, { fileName: file.name }); + const screenshots = await parser.getScreenshot({ page: 1 }); + if (screenshots?.pages && screenshots.pages.length > 0) { + const maxPages = Math.min(screenshots.pages.length, 8); // 메모리 보호: 최대 8페이지 + for (let i = 0; i < maxPages; i++) { + const page = screenshots.pages[i]; + if (page?.data) { + const pageBase64 = Buffer.from(page.data).toString('base64'); + images.push({ + name: `${file.name}_page${i + 1}.png`, + type: 'image/png', + data: pageBase64 + }); + } + } + textContents.push(`\n[PDF: ${file.name}]\n(이미지 기반 PDF ${screenshots.total}페이지 중 ${maxPages}페이지를 이미지로 추출하여 Vision 분석합니다. 각 페이지 이미지를 참조하여 문서의 내용을 상세히 분석하고 한국어로 정리하세요.)`); + logInfo(`PDF vision fallback: extracted ${maxPages} page screenshots.`, { fileName: file.name, totalPages: screenshots.total }); + pdfTextOk = true; // Vision 분석으로 처리 완료 + } } } catch (pdfError: any) { - logError(`PDF parsing failed.`, { fileName: file.name, error: pdfError?.message || String(pdfError) }); - textContents.push(`\n[PDF: ${file.name}]\n(PDF 파싱 오류: ${pdfError?.message || '알 수 없는 오류'})`); + logError(`PDF processing failed.`, { fileName: file.name, error: pdfError?.message || String(pdfError) }); + } + + // 최종 폴백: 텍스트도 없고 이미지 추출도 실패한 경우 + if (!pdfTextOk) { + textContents.push(`\n[PDF: ${file.name}]\n(PDF 분석에 실패했습니다. 이 파일을 텍스트로 변환하여 다시 시도해주세요.)`); } } else if ( type.startsWith('text/') ||