feat: v2.2.74 → v2.2.82 — chunked writer + 코드 리뷰 패치 + /youtube 확장

주요 변경: [chunked writer 아키텍처 (v2.2.74~v2.2.75)] - 5-stage 다중 에이전트(planner/researcher/reflector/writer/synthesizer) 파이프라인 제거 → 단일 ChunkedWriter 의 outline → section[N] → polish 3-step 으로 교체. 본문 분석에서 추상화 손실 / 토큰 폭증 문제 해소 - 답변 길이 자동 분기: 짧은 prompt 는 fast-path direct 1회 호출, 본문 분석은 chunked. outline 빈 배열도 direct 폴백 [코드 리뷰 9개 항목 일괄 패치 (v2.2.76)] - /research polling hang 방어 (heartbeat + status 정규화 + 연속 실패 abort) - 회사 모드 dispatcher abort 신호를 AIService.chat 까지 전달 - bridgeFetch 에 onHeartbeat 콜백 도입 (slow endpoint 사용자 친화적) - dead code 정리: reflectionPersister.ts 제거 + enableReflection 등 좀비 config 키 - parseOutline 의 empty vs fallback reason 명시적 분리 - chatHandlers 의 회사 모드 케이스 ~325줄을 src/sidebar/companyHandlers.ts 로 분리 - Intent Alignment 라운드 한도 도달 시 smart 모드 자동 진행 - LM Studio doSwitch unload 실패 시 currentModel 정리 + load 강행 - retrieval informationDensity → queryCoverage 정합화 [/youtube 채널 지원 (v2.2.77~v2.2.82)] - 채널/플레이리스트 URL 자동 감지 + n:N 으로 영상 개수 지정 (최대 50) - 채널 루트 URL 에 /videos 탭 자동 append (yt-dlp enumeration 정상화) - 영상별 순차 처리 (queue 패턴) + i/N 진행 표시 + 마지막 통계 요약 - mode:info / mode:benchmark / mode:both 분석 모드 분기 - info: 영상 내용을 지식 카드로 추출 (튜토리얼·강의·뉴스용) - benchmark: 4-렌즈 대본 역기획서 (콘텐츠 제작 벤치마크용) - both: 둘 다 (기본) - bare keyword 도 허용: /youtube <url> n:1 info - bridge 에러 메시지 [object Object] 깨짐 수정 (구조화 에러 추출) - "패키지 없음" 등 환경 의존성 에러에 자동 가이드 첨부 [Astra: Setup Datacollect Dependencies 명령 추가 (v2.2.80)] - Python 자동 감지 + yt-dlp / youtube-transcript-api 자동 설치 - macOS PEP 668 환경 자동 폴백 (--user --break-system-packages) - /youtube 등에서 패키지 미설치 감지 시 "Install Now" 버튼 notification [테스트] - tests/agentEngine.test.ts 를 chunked flow 에 맞춰 전체 재작성 - tests/resilience_stress.test.ts Scenario B/D 를 role-aware mock 으로 갱신 - 399/399 통과 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-23 23:13:21 +09:00
parent 0712014fcb
commit ded3eea7ce
39 changed files with 2098 additions and 1820 deletions
@@ -227,7 +227,7 @@ export async function runCeoPlanner(
    ai: IAIService,
    userPrompt: string,
    state: CompanyState,
-    options: { model?: string; timeoutMs?: number; contractBlock?: string } = {},
+    options: { model?: string; timeoutMs?: number; contractBlock?: string; signal?: AbortSignal } = {},
 ): Promise<PlannerResult> {
    const baseSystem = applyPromptVars(CEO_PLANNER_PROMPT, { company: state.companyName });
    // Contract가 있으면 planner 시스템 프롬프트 끝에 prepend. planner는 task
@@ -243,6 +243,7 @@ export async function runCeoPlanner(
            user: userPrompt,
            model: options.model,
            timeoutMs: options.timeoutMs,
+            signal: options.signal,
        });
        raw = result.content || '';
    } catch (e: any) {
@@ -99,7 +99,7 @@ export async function runCeoReporter(
    plan: CompanyTaskPlan,
    outputs: AgentTurnOutput[],
    state: CompanyState,
-    options: { model?: string; timeoutMs?: number } = {},
+    options: { model?: string; timeoutMs?: number; signal?: AbortSignal } = {},
 ): Promise<ReportResult> {
    const system = applyPromptVars(CEO_REPORT_PROMPT, { company: state.companyName });
    const user = _buildReportUserMessage(plan, outputs, state);
@@ -109,6 +109,7 @@ export async function runCeoReporter(
            user,
            model: options.model,
            timeoutMs: options.timeoutMs,
+            signal: options.signal,
        });
        const text = (result.content || '').trim();
        if (!text) {
@@ -337,6 +337,7 @@ export async function runCompanyTurn(
                contractBlock: deps.requirementContract
                    ? formatContractForPrompt(deps.requirementContract)
                    : undefined,
+                signal: deps.signal,
            });
            plan = plannerResult.plan;
            plannerRaw = plannerResult.raw;
@@ -445,7 +446,7 @@ export async function runCompanyTurn(
        plan,
        outputs,
        state,
-        { model: reportModel },
+        { model: reportModel, signal: deps.signal },
    );
    writeReport(sessionDir, reportResult.report);
    emit({ phase: 'report-done', report: reportResult.report, ok: reportResult.ok });
@@ -660,6 +661,7 @@ async function _dispatchOne(
            system,
            user: task,
            model,
+            signal: deps.signal,
        });
        let rawResponse = (result.content || '').trim();

@@ -697,6 +699,7 @@ async function _dispatchOne(
                    try {
                        const retryRes = await deps.ai.chat({
                            system, user: retryTask, model,
+                            signal: deps.signal,
                        });
                        const retried = (retryRes.content || '').trim();
                        if (retried) {
@@ -777,7 +780,7 @@ async function _dispatchOne(
                                    try {
                                        const { formatIssuesForRetry } = await import('../selfReflector/selfReflectorVerifier');
                                        const retryTask = `${formatIssuesForRetry(verifierIssues)}\n\n[원래 지시]\n${task}`;
-                                        const retryRes = await deps.ai.chat({ system, user: retryTask, model });
+                                        const retryRes = await deps.ai.chat({ system, user: retryTask, model, signal: deps.signal });
                                        const retried = (retryRes.content || '').trim();
                                        if (retried) {
                                            // 재작업 결과로 본문 갱신 + action-tag 다시 실행.
@@ -937,6 +940,7 @@ async function _resolveStageAgent(
        const result = await deps.ai.chat({
            system, user,
            model: modelForAgent(state, 'ceo', deps.defaultModel),
+            signal: deps.signal,
        });
        const raw = (result.content || '').trim();
        // 가벼운 파서 — 코드펜스 / 잡문 제거 후 첫 {…} 추출.
@@ -1085,6 +1089,7 @@ async function _runReviewCycle(args: {
                system: inspectorSystem,
                user: inspectorUser,
                model: modelForAgent(state, inspector.agentId, deps.defaultModel),
+                signal: deps.signal,
            });
            inspectorText = (res.content || '').trim();
        } catch (e: any) {
@@ -1108,6 +1113,7 @@ async function _runReviewCycle(args: {
                system: ceoSystem,
                user: ceoUser,
                model: modelForAgent(state, 'ceo', deps.defaultModel),
+                signal: deps.signal,
            });
            ceoText = (res.content || '').trim();
        } catch (e: any) {
@@ -19,6 +19,15 @@ import { IAIService } from '../../core/services';
 import { logError, logInfo } from '../../utils';
 import { RequirementContract } from './types';

+/**
+ * Alignment 라운드 기본 상한. config 의 `company.intentAlignmentMaxRounds`
+ * 미지정 시 fallback 값. config 시 [1,5] 범위로 clamp.
+ *
+ * 의도: 사용자가 명시 설정 없이도 무한정 질문받지 않도록 *코드 레벨* 에서 보장.
+ * 라운드 한도 도달 시 smart 모드에선 자동 진행, strict 모드에선 확인 카드.
+ */
+export const ALIGNMENT_DEFAULT_MAX_ROUNDS = 3;
+
 /**
 * 분석 한 회차의 결과. contract는 항상 채워서 돌아오고, 추가 정보가 필요한
 * 경우만 confidence가 medium/low이고 openQuestions가 비어 있지 않다. 호출자가
@@ -22,6 +22,15 @@ export function getBridgeBaseUrl(): string {
 export interface BridgeFetchOptions {
    timeoutMs?: number;
    signal?: AbortSignal;
+    /**
+     * 호출이 N ms 이상 지속되면 N ms 마다 한 번씩 호출되는 콜백. 긴 호출
+     * (synthesize / scan / import) 에서 사용자에게 "살아있다" 신호를 흘리려고
+     * 도입. 콜백은 fire-and-forget 으로 호출되며 예외는 silently swallow.
+     * 기본은 호출되지 않음.
+     */
+    onHeartbeat?: (elapsedMs: number) => void;
+    /** heartbeat 간격 (ms). 미지정 시 30s. */
+    heartbeatMs?: number;
 }

 /**
@@ -48,6 +57,17 @@ export async function bridgeFetch<T = any>(
        else opts.signal.addEventListener('abort', () => controller.abort(), { once: true });
    }

+    // Heartbeat — 긴 LLM synthesize / Playwright scan 도중에도 사용자에게
+    // "살아있다" 신호. 호출자가 onHeartbeat 안 줬으면 비활성.
+    const heartbeatStartedAt = Date.now();
+    let heartbeatInterval: NodeJS.Timeout | undefined;
+    if (opts.onHeartbeat) {
+        const intervalMs = Math.max(5_000, opts.heartbeatMs ?? 30_000);
+        heartbeatInterval = setInterval(() => {
+            try { opts.onHeartbeat!(Date.now() - heartbeatStartedAt); } catch { /* noop */ }
+        }, intervalMs);
+    }
+
    try {
        const res = await fetch(url, {
            ...init,
@@ -63,13 +83,35 @@ export async function bridgeFetch<T = any>(

        if (!res.ok) {
            const stage = body?.stage ? `[${body.stage}] ` : '';
-            const errMsg = body?.error || body?.message || (typeof body === 'string' ? body : `HTTP ${res.status}`);
-            throw new Error(`Datacollect ${path} 실패: ${stage}${errMsg}`);
+            // Bridge 가 에러 body 를 객체로 보낼 때 (e.g. `{error: {message, code, details}}`)
+            // 옛 포맷터는 `body.error` 가 객체면 `${}` 보간이 `[object Object]` 로 깨져
+            // 사용자가 실제 원인 메시지를 못 봄. 문자열 추출을 우선순위대로 시도:
+            //   1) body.error.message  (구조화된 에러)
+            //   2) body.error          (문자열일 때)
+            //   3) body.message        (외곽 message)
+            //   4) body 가 통째로 문자열
+            //   5) JSON.stringify(body.error)  (최후 — 구조 그대로 노출)
+            //   6) HTTP status 만
+            const extractErr = (): string => {
+                if (body?.error?.message && typeof body.error.message === 'string') return body.error.message;
+                if (typeof body?.error === 'string') return body.error;
+                if (typeof body?.message === 'string') return body.message;
+                if (typeof body === 'string') return body;
+                if (body?.error) {
+                    try { return JSON.stringify(body.error).slice(0, 400); } catch { /* fall through */ }
+                }
+                return `HTTP ${res.status}`;
+            };
+            throw new Error(`Datacollect ${path} 실패: ${stage}${extractErr()}`);
        }
        return body as T;
    } catch (e: any) {
        if (e?.name === 'AbortError') {
-            throw new Error(`Datacollect ${path} 시간 초과 (${timeoutMs}ms). Bridge가 떠 있는지 확인하세요 (${base}).`);
+            // 외부 signal 로 인한 abort 인지 timeout 인지 구분해서 안내.
+            if (opts.signal?.aborted) {
+                throw new Error(`Datacollect ${path} 취소됨 (사용자 abort).`);
+            }
+            throw new Error(`Datacollect ${path} 시간 초과 (${timeoutMs}ms). Bridge가 응답하지 않습니다 (${base}).`);
        }
        // ECONNREFUSED 등 connect 실패는 친절히 안내.
        const msg = String(e?.message || e);
@@ -82,5 +124,6 @@ export async function bridgeFetch<T = any>(
        throw e;
    } finally {
        clearTimeout(timer);
+        if (heartbeatInterval) clearInterval(heartbeatInterval);
    }
 }
@@ -80,8 +80,22 @@ export async function handleSlashCommand(
        }
        return true;
    } catch (e: any) {
-        logInfo(`[SLASH] handleSlashCommand error head=${head}: ${e?.message || String(e)}`);
-        chunk(view, `\n\n> ❌ **에러**: ${e?.message || String(e)}\n`);
+        const errMsg = e?.message || String(e);
+        logInfo(`[SLASH] handleSlashCommand error head=${head}: ${errMsg}`);
+        chunk(view, `\n\n> ❌ **에러**: ${errMsg}\n`);
+        // 자주 발생하는 환경 의존성 에러는 사용자가 즉시 해결할 수 있게 명령 가이드 자동 첨부.
+        const remedy = _bridgeErrorRemedy(errMsg);
+        if (remedy) chunk(view, remedy);
+        // Python 패키지 미설치 패턴이면 한 클릭 설치 notification 도 같이 띄움.
+        // 채팅 텍스트만 보면 사용자가 명령 팔레트로 가기 귀찮으니까 actionable 버튼 제공.
+        const pkgMatch = errMsg.match(/필수 패키지가 없습니다?[:\s]+([\w\-,\s.]+)/i)
+            || errMsg.match(/missing (?:python )?packages?[:\s]+([\w\-,\s.]+)/i);
+        if (pkgMatch) {
+            try {
+                const { offerInstallNotification } = await import('../setup/datacollectSetup');
+                void offerInstallNotification(pkgMatch[1].trim());
+            } catch { /* setup 모듈 로드 실패해도 텍스트 가이드는 이미 보냈으니 무시 */ }
+        }
        return true;
    } finally {
        // input 잠금 해제 — slashRouter 진입했으면 어떤 경로든 반드시 통과.
@@ -107,45 +121,91 @@ async function runResearch(topic: string, view: Webview | undefined): Promise<bo
    chunk(view, `- notebookId: \`${start.notebookId}\`\n- taskId: \`${start.taskId}\`\n\n⏳ 상태 polling (5초 간격, 최대 10분)…\n`);

    // Deep research는 보통 1~5분. 5초 polling, 최대 120회(10분).
+    //
+    // hang 방어 3겹:
+    //   (1) Bridge status 가 5회 연속 실패하면 polling 포기 — bridge 가 죽은 거.
+    //   (2) heartbeat — 30초마다 진행 상태가 안 바뀌면 "⏳" 한 줄 흘려 사용자가
+    //       "멈춰 있나?" 느끼지 않게.
+    //   (3) status 비교는 트림 + 소문자 — bridge 가 "Completed " 식으로 흘려도 잡힘.
    const deadline = Date.now() + 10 * 60_000;
+    const HEARTBEAT_MS = 30_000;
+    const MAX_CONSECUTIVE_FAILS = 5;
+    const COMPLETED_SET = new Set(['completed', 'done', 'success', 'finished']);
+    const FAILED_SET = new Set(['failed', 'error', 'cancelled', 'canceled', 'aborted']);
+
    let lastStatus = '';
+    let lastChangeAt = Date.now();
+    let consecutiveFails = 0;
+    let pollCount = 0;
+    let researchOk = false;
    while (Date.now() < deadline) {
        await new Promise(r => setTimeout(r, 5_000));
+        pollCount++;
        // status 한 번 호출이 30s를 넘는 사례(stale MCP 자식)가 보고돼 60s로 완화.
-        const st = await bridgeFetch<{ success: boolean; result: any }>(
-            `/api/research/status?notebookId=${encodeURIComponent(start.notebookId)}&taskId=${encodeURIComponent(start.taskId)}`,
-            { method: 'GET' },
-            { timeoutMs: 60_000 },
-        );
-        const status = String(st.result?.status || st.result || '').toLowerCase();
+        let st: { success: boolean; result: any } | undefined;
+        try {
+            st = await bridgeFetch<{ success: boolean; result: any }>(
+                `/api/research/status?notebookId=${encodeURIComponent(start.notebookId)}&taskId=${encodeURIComponent(start.taskId)}`,
+                { method: 'GET' },
+                { timeoutMs: 60_000 },
+            );
+            consecutiveFails = 0;
+        } catch (e: any) {
+            consecutiveFails++;
+            if (consecutiveFails >= MAX_CONSECUTIVE_FAILS) {
+                chunk(view, `\n❌ Status polling 연속 실패 ${consecutiveFails}회 — bridge 가 응답하지 않습니다. 중단합니다.\n(원인: ${e?.message || String(e)})\n`);
+                return true;
+            }
+            chunk(view, `\n  · status 호출 실패 ${consecutiveFails}/${MAX_CONSECUTIVE_FAILS} (${e?.message || 'unknown'})\n`);
+            continue;
+        }
+        const status = String(st.result?.status || st.result || '').trim().toLowerCase();
        if (status && status !== lastStatus) {
            chunk(view, `  · ${status}\n`);
            lastStatus = status;
+            lastChangeAt = Date.now();
+        } else if (Date.now() - lastChangeAt > HEARTBEAT_MS) {
+            // 30초간 status 변화 없음 — 사용자에게 살아있다는 신호.
+            chunk(view, `  · ⏳ 대기 중 (${Math.round((Date.now() - lastChangeAt) / 1000)}s, 폴링 ${pollCount}회)\n`);
+            lastChangeAt = Date.now();
        }
-        if (status === 'completed' || status === 'done' || status === 'success' || status === 'finished') break;
-        if (status === 'failed' || status === 'error') {
+        if (COMPLETED_SET.has(status)) { researchOk = true; break; }
+        if (FAILED_SET.has(status)) {
            chunk(view, `\n❌ Research 실패: ${JSON.stringify(st.result).slice(0, 400)}\n`);
            return true;
        }
    }

+    if (!researchOk) {
+        chunk(view, `\n❌ 10분 polling 후에도 완료 신호가 오지 않았습니다 (마지막 status: \`${lastStatus || '(없음)'}\`). 중단합니다.\n`);
+        return true;
+    }
+
    chunk(view, `\n📥 import…\n`);
    // import는 deep research 결과를 노트북 소스로 옮기는 단계. 큰 리포트는 2~5분
    // 걸리는 경우가 흔해 120s에서 TRANSIENT_TIMEOUT으로 떨어지는 사례 보고됨. 300s로 늘림.
+    // heartbeat — 30초마다 진행 표시 흘려 사용자가 "멈췄나?" 의심하지 않게.
    await bridgeFetch('/api/research/import', {
        method: 'POST',
        body: JSON.stringify({ notebookId: start.notebookId, taskId: start.taskId }),
-    }, { timeoutMs: 300_000 });
+    }, {
+        timeoutMs: 300_000,
+        onHeartbeat: (elapsedMs) => chunk(view, `  · import 진행 중 (${Math.round(elapsedMs / 1000)}s)\n`),
+    });

    chunk(view, `🧪 synthesize…\n\n`);
    // synthesize는 LLM이 노트북 전체를 합성 — 큰 노트북은 5~10분. 600s로 cap.
+    // heartbeat 필수: LLM 단일 호출이 수 분 걸리므로 hang 의심 방지.
    const synth = await bridgeFetch<{ success: boolean; markdown?: string; result?: string }>(
        '/api/research/synthesize',
        {
            method: 'POST',
            body: JSON.stringify({ notebookId: start.notebookId, topic, rootTopic: topic, includeKnowledgeConnections: true }),
        },
-        { timeoutMs: 600_000 },
+        {
+            timeoutMs: 600_000,
+            onHeartbeat: (elapsedMs) => chunk(view, `  · synthesize LLM 작업 중 (${Math.round(elapsedMs / 1000)}s)\n`),
+        },
    );
    const md = synth.markdown || synth.result || '(빈 응답)';
    chunk(view, `---\n\n${md}\n`);
@@ -694,6 +754,112 @@ function bucketSegments(segments: any[] | undefined, bucketSec = 30): { time: st
        }));
 }

+/** Astra `/youtube` 의 분석 모드. 사용자 입력 `mode:info|benchmark|both`. */
+type YoutubeAnalysisMode = 'info' | 'benchmark' | 'both';
+
+/**
+ * 정보 추출(info) 모드 LLM 프롬프트 — 영상의 *내용·지식* 자체를 다룬다.
+ *
+ * 의도: build4LensPrompt 가 "이 영상을 어떻게 베껴 만들지" 의 벤치마킹 톤이라
+ * 튜토리얼·강의·뉴스·인터뷰·리뷰 같은 정보형 영상에서는 가치가 낮다. 이 함수는
+ * 정반대 방향 — 영상이 *말한 것* 을 사실·주장·근거 단위로 추출해서, 사용자가
+ * 영상을 안 다시 봐도 의사결정·학습·인용에 바로 쓸 수 있는 지식 카드로 정리한다.
+ *
+ * 출력 규칙은 build4LensPrompt 와 일관 (마크다운, 한국어, 자막에 있는 것만 인용).
+ */
+function buildInfoExtractionPrompt(video: any, userContent: string): string {
+    const meta = video.metadata || {};
+    const segments = video.segments || [];
+
+    // 자막 본문 — info 모드는 *전체* 본문을 보여줘야 사실 추출이 정확. 단,
+    // LLM 컨텍스트 한도 고려해 너무 길면 trim. 12000자 = 가벼운 강의 60분 분량 정도.
+    const fullText = segments.map((s: any) => String(s.text || '').trim()).join(' ').replace(/\s+/g, ' ');
+    const trimmed = fullText.length > 12000 ? fullText.slice(0, 12000) + ' …[자막 일부 잘림]' : fullText;
+
+    const slim = {
+        url: meta.webpage_url || `https://www.youtube.com/watch?v=${video.video_id}`,
+        title: meta.title || video.title,
+        channel: meta.channel,
+        durationSec: meta.duration,
+        durationHms: meta.duration_string,
+        uploadDate: meta.upload_date,
+        viewCount: meta.view_count,
+        likeCount: meta.like_count,
+        tags: (meta.tags || []).slice(0, 8),
+        categories: meta.categories,
+        chapters: meta.chapters,
+        descriptionPreview: (meta.description || '').slice(0, 600),
+    };
+
+    const today = new Date().toISOString().slice(0, 10);
+    const userBlock = userContent.trim()
+        ? `\n\n[사용자 컨텍스트 — 사용자가 어떤 관점에서 이 영상을 활용하려는지]\n${userContent.trim()}`
+        : '';
+
+    return `당신은 영상 콘텐츠를 *지식 카드*로 변환하는 정보 큐레이터입니다. 사장님이
+이 영상을 다시 보지 않고도 핵심 정보를 그대로 활용할 수 있도록, 영상이 *말한 것*
+(주장·사실·근거·결론)을 구조화해서 정리하세요.
+
+[분석 원칙]
+1. 영상 본문(자막)에 *명시된 것* 만 인용. 추측·일반론·외부 지식 보강 금지.
+2. 자막에 없는 사실은 "본문에 명시되지 않음" 이라고 표시. 채워 넣지 말 것.
+3. 정보의 신뢰도 단계 표기: \`[근거 명시]\` (구체 출처·수치·인용)·\`[화자 주장]\`
+   (출처 없는 단정)·\`[가정]\` (조건부 표현). 모든 핵심 주장에 라벨링.
+4. 타임스탬프는 mm:ss 형식으로 인용 직후 괄호에. 예: "…라고 말한다 (12:34)".
+5. 한국어 마크다운. 표·불릿 자유롭게.
+
+[영상 메타데이터]
+\`\`\`json
+${JSON.stringify(slim, null, 2)}
+\`\`\`
+
+[자막 본문]
+${trimmed}${userBlock}
+
+[필수 출력 형식 — 정확히 이 구조. 아래 6개 섹션 외 추가 금지]
+
+# ${slim.title || video.title} — 정보 추출 카드
+
+> **영상 URL**: ${slim.url} · **분석 일자**: ${today} · **길이**: ${slim.durationHms || (slim.durationSec ? formatHms(slim.durationSec) : '?')} · **채널**: ${slim.channel || '?'}
+
+## 🎯 한 줄 요약 (TL;DR)
+(영상의 핵심 메시지 한 문장. "무엇이 누구에게 왜 중요한가" 를 압축. 제목 그대로 베끼지 말고 본문 기준으로 다시 쓸 것)
+
+## 📌 핵심 주장 3~5개
+영상이 제시한 *주요 결론·주장* 만. 각 항목 한 줄 + 신뢰도 라벨 + 본문 인용 (mm:ss).
+- **[근거 명시]** "주장 한 줄" — 본문 인용 (mm:ss)
+- **[화자 주장]** "주장 한 줄" — 본문 인용 (mm:ss)
+- …
+
+## 📊 사실·데이터·인용
+영상에 등장한 *구체적 수치·날짜·출처·고유명사·전문 용어 정의*. 가공 없이 그대로.
+표로 정리:
+
+| 항목 | 값 / 정의 | 출처 (영상 내) | 타임스탬프 |
+| --- | --- | --- | --- |
+| … | … | 화자/자료 화면/외부 출처 | mm:ss |
+
+데이터가 없는 영상이면 "본문에 명시된 구체 수치·출처 없음" 한 줄.
+
+## 🧭 구조 요약 (Sectioned Summary)
+영상을 chapters (있으면) 또는 30초 버킷으로 구간 나눠 각 구간의 *내용 요약*. 1~2문장씩.
+- **[00:00–02:30]** 도입부에서 다룬 내용 한 문장 요약
+- **[02:30–05:00]** 본론 첫 부분…
+- …
+
+## ❓ 더 파고들 질문 (Open Questions)
+영상이 답하지 않았거나 추가 검증 필요한 사항 2~4개. 사장님이 다음 자료를 찾을 때
+바로 검색어로 쓸 수 있게 구체적으로.
+- "본문에서 X 가 Y 라고 했지만 Z 데이터 출처는 명시 안 됨 — 원 데이터 찾아볼 것"
+- …
+
+## 🔗 인용용 한 줄 카드 (Citation Snippets)
+영상의 *결정적 발언* 을 그대로 따옴표로 보존. 사장님이 글·발표·메모에 인용할 때 복붙용.
+3~5개. 길이는 한 문장.
+- "직접 인용 한 문장" — ${slim.title || video.title}, ${slim.channel || '?'} (mm:ss)
+- …`;
+}
+
 /**
 * extract된 영상 → 유튜브 4-렌즈(훅/구조/제작/CTR) 분석 LLM 프롬프트.
 * Datacollect 웹앱(YoutubePanel)의 build4LensPrompt를 그대로 이식.
@@ -825,26 +991,188 @@ chapters가 있으면 그것을, 없으면 timelinePreview로 구간을 추정.
 > ⚠️ 본 분석은 스크립트의 언어·구조 패턴 학습용입니다. 대사·자료는 직접 창작/라이선스 확보.`;
 }

+/**
+ * URL 이 *채널/플레이리스트* 처럼 보이는지 휴리스틱. yt-dlp 는 채널 URL 을
+ * 그대로 받아 영상 목록을 enumerate 하므로, 우리는 채널일 때 default limit
+ * 만 다르게 잡아주면 된다(단일 영상은 1, 채널은 10).
+ */
+function _looksLikeYoutubeChannelUrl(url: string): boolean {
+    return /youtube\.com\/(channel\/|@|c\/|user\/|playlist\?list=|playlist\/)/i.test(url)
+        || /youtube\.com\/[^/?#]+\/(videos|shorts|streams)\b/i.test(url);
+}
+
+/**
+ * 채널 URL 을 yt-dlp 가 *영상 목록* 으로 정확히 enumerate 하는 형태로 정규화.
+ *
+ * 의도: `https://www.youtube.com/@handle` 같은 채널 "루트" 를 그냥 yt-dlp 에
+ * 넘기면 영상 ID 대신 채널 ID(`UC...`) 가 영상 entry 로 잘못 돌아오는 사례
+ * 발견 (Deno-AI 채널 케이스). `/videos` 탭을 명시하면 정상 enumerate.
+ *
+ * 규칙:
+ *   - 이미 `/videos`, `/shorts`, `/streams`, `/playlist` 가 path 에 있으면 그대로
+ *   - 단일 영상 URL (`watch?v=`, `youtu.be/<id>`, `/shorts/<id>`) 는 그대로
+ *   - 그 외 채널 패턴 (`@handle`, `channel/UC..`, `c/name`, `user/name`) 만
+ *     `/videos` 를 append (query 가 있으면 path 뒤에 끼움)
+ */
+function _normalizeYoutubeUrl(url: string): string {
+    try {
+        const u = new URL(url);
+        if (!/youtube\.com$|youtube\.com\.|youtu\.be$/i.test(u.hostname)) return url;
+        const p = u.pathname;
+        // 이미 영상 단위거나 탭/플레이리스트가 명시된 경우는 손대지 말 것.
+        if (/\/(watch|shorts|playlist|videos|streams|featured|community|about)\b/i.test(p)) return url;
+        if (u.hostname.includes('youtu.be')) return url; // youtu.be/<id> 는 영상 short link
+        // 채널 루트 패턴 — `/videos` 를 append (이미 끝 슬래시 있으면 정리).
+        if (/^\/(@[^/]+|channel\/[^/]+|c\/[^/]+|user\/[^/]+)\/?$/i.test(p)) {
+            u.pathname = p.replace(/\/?$/, '/videos');
+            return u.toString();
+        }
+        return url;
+    } catch {
+        return url; // URL 파싱 실패 시 손대지 않음
+    }
+}
+
+/**
+ * Datacollect bridge 가 자주 뱉는 환경 의존성 에러(Python 패키지 미설치, Python
+ * 자체 부재 등) 를 패턴 매칭해서 사용자에게 *해결 명령까지* 알려주는 가이드 텍스트.
+ * 없으면 빈 문자열 반환. slashRouter 의 catch 블록에서 일반 에러 메시지 뒤에
+ * append 하는 안전망.
+ */
+function _bridgeErrorRemedy(rawMsg: string): string {
+    const msg = String(rawMsg || '');
+    // 패턴 1 — Python 패키지 미설치 (bridge 가 명시적으로 알려줌).
+    const pkgMatch = msg.match(/필수 패키지가 없습니다?[:\s]+([\w\-,\s.]+)/i)
+        || msg.match(/missing (?:python )?packages?[:\s]+([\w\-,\s.]+)/i);
+    if (pkgMatch) {
+        const pkgs = pkgMatch[1].split(/[,\s]+/).map((s) => s.trim()).filter(Boolean).join(' ');
+        return `\n\n💡 **해결**: Datacollect bridge 가 도는 환경에서 아래 명령으로 누락된 Python 패키지를 설치하세요.\n\n`
+            + '```bash\n'
+            + `# macOS (homebrew Python — PEP 668 보호 우회):\n`
+            + `python3 -m pip install --user --break-system-packages ${pkgs}\n\n`
+            + `# 또는 가상환경(venv) 사용 시 그 venv 활성화 후:\n`
+            + `pip install ${pkgs}\n`
+            + '```\n\n'
+            + `설치 후 **bridge 재시작은 보통 불필요** — bridge 는 Python 을 child process 로 spawn 하므로 다음 호출이 바로 새 패키지를 인식합니다. 그래도 안 되면 \`npm run bridge\` 재시작.\n`;
+    }
+    // 패턴 2 — Python 자체가 없거나 PATH 에 없음.
+    if (/Python 3이 설치돼 있지 않거나 PATH/i.test(msg) || /command not found.*python/i.test(msg)) {
+        return `\n\n💡 **해결**: Python 3 이 설치돼 있어야 합니다. https://www.python.org 에서 설치 후 터미널에서 \`python3 --version\` 으로 확인하세요. 이미 설치돼 있으면 PATH 설정 확인 필요.`;
+    }
+    // 패턴 3 — bridge 자체에 연결 실패.
+    if (/ECONNREFUSED|fetch failed/i.test(msg) || /연결할 수 없습니다/i.test(msg)) {
+        return `\n\n💡 **해결**: Datacollect bridge 가 떠 있지 않습니다. \`Datacollector_MAC\` 프로젝트에서 \`npm run bridge\` 실행 후 다시 시도하세요.`;
+    }
+    return '';
+}
+
+/**
+ * 채널/플레이리스트 처리 시 한 번에 너무 많이 돌려 사용자가 후회하지 않도록 cap.
+ * 영상 1건당 LLM 분석에 보통 30~120s 걸리는 점을 감안.
+ */
+const YOUTUBE_BATCH_MAX = 50;
+
 async function runYoutube(arg: string, view: Webview | undefined): Promise<boolean> {
-    // URL 토큰만 추출, 나머지는 보조 컨텍스트(우리 채널/콘텐츠 설명).
+    // 토큰 파싱 — URL 뒤로는 두 가지 형태의 키워드 + 자유 컨텍스트 텍스트.
+    //
+    //   n:<숫자>                       → 채널일 때 가져올 영상 개수
+    //   mode:<info|benchmark|both>     → 분석 모드 (key:value 형)
+    //   info / benchmark / both        → 같은 모드의 bare keyword 형 (더 짧고 직관적)
+    //
+    // bare keyword 가 작동하는 이유: `info`/`benchmark`/`both` 는 영어 단어이고
+    // 한국어 사용자가 컨텍스트로 쓸 가능성이 매우 낮아 충돌 위험 적음. 사용자가
+    // 진짜 이 단어들을 컨텍스트로 넣고 싶으면 `mode:` 접두사를 빼지 말고 명시
+    // (이 경우 일반 단어도 컨텍스트로 같이 넣을 수 있음).
+    //
+    // 위 패턴 중 하나도 매칭 안 되는 토큰은 모두 사용자 컨텍스트로 join.
+    const BARE_MODE_KEYWORDS = new Set(['info', 'benchmark', 'both']);
    const tokens = arg.trim().split(/\s+/).filter(Boolean);
    const url = tokens[0] || '';
-    const userContent = tokens.slice(1).join(' ');
+    let limitOverride: number | null = null;
+    let mode: YoutubeAnalysisMode = 'both';
+    const contextTokens: string[] = [];
+    for (const tok of tokens.slice(1)) {
+        const nMatch = tok.match(/^n[:=](\d+)$/i);
+        if (nMatch) {
+            const n = parseInt(nMatch[1], 10);
+            if (Number.isFinite(n) && n > 0) {
+                limitOverride = Math.min(YOUTUBE_BATCH_MAX, n);
+            }
+            continue;
+        }
+        const modeMatch = tok.match(/^mode[:=](info|benchmark|both)$/i);
+        if (modeMatch) {
+            mode = modeMatch[1].toLowerCase() as YoutubeAnalysisMode;
+            continue;
+        }
+        // Bare keyword 형 — `info` / `benchmark` / `both` 자체를 토큰으로.
+        const lower = tok.toLowerCase();
+        if (BARE_MODE_KEYWORDS.has(lower)) {
+            mode = lower as YoutubeAnalysisMode;
+            continue;
+        }
+        contextTokens.push(tok);
+    }
+    const userContent = contextTokens.join(' ');
+
    if (!url) {
-        chunk(view, `사용법: \`/youtube <url> [우리 채널/콘텐츠 설명]\`\n예: \`/youtube https://youtu.be/xxxx\`\n`);
+        chunk(view, [
+            `사용법:\n`,
+            `- 단일 영상: \`/youtube <영상URL> [info|benchmark|both] [컨텍스트]\`\n`,
+            `- 채널/플레이리스트: \`/youtube <채널URL> [n:30] [info|benchmark|both] [컨텍스트]\`\n`,
+            `\n**분석 모드** (생략 시 \`both\`):\n`,
+            `- \`info\` — 영상의 *내용*을 지식 카드로 추출 (튜토리얼·강의·뉴스·인터뷰)\n`,
+            `- \`benchmark\` — 대본 역기획서 4-렌즈 분석 (콘텐츠 제작 벤치마크용)\n`,
+            `- \`both\` — 둘 다 생성 (영상당 LLM 호출 2회)\n`,
+            `\n예시:\n`,
+            `- \`/youtube https://youtu.be/abc info\`\n`,
+            `- \`/youtube https://youtube.com/@somechannel n:20 info AI 학습 자료\`\n`,
+            `\n💡 \`mode:info\` / \`mode=info\` 같은 명시형도 그대로 동작 (백워드 호환).\n`,
+        ].join(''));
        return true;
    }

-    chunk(view, `🎬 **YouTube 추출**: ${url}\n(자막 + 메타데이터)\n\n⏳ Python 추출기 기동 · 자막/메타 추출 중…`);
+    // 채널 URL 감지 → 기본 10개. 단일 영상은 1개. 사용자가 `n:N` 으로 명시했으면 그 값.
+    const isChannel = _looksLikeYoutubeChannelUrl(url);
+    const limit = limitOverride ?? (isChannel ? 10 : 1);
+
+    // yt-dlp 가 영상 목록을 enumerate 할 수 있도록 채널 루트 URL 에 `/videos` 탭을
+    // 자동 append. 그렇지 않으면 채널 ID(UC...)가 영상 ID 로 잘못 들어가는 사고.
+    const normalizedUrl = isChannel ? _normalizeYoutubeUrl(url) : url;
+    if (normalizedUrl !== url) {
+        chunk(view, `🔧 채널 URL 정규화: \`${url}\` → \`${normalizedUrl}\` (yt-dlp 영상 enumeration 을 위한 \`/videos\` 탭 명시)\n\n`);
+    }
+
+    const modeLabel = mode === 'info' ? '📋 정보 추출 (지식 카드)'
+        : mode === 'benchmark' ? '🎬 벤치마킹 (4-렌즈 역기획서)'
+        : '📋 정보 추출 + 🎬 벤치마킹 (둘 다)';
+    if (isChannel) {
+        const callsPerVideo = mode === 'both' ? 2 : 1;
+        chunk(view, `📺 **채널/플레이리스트 감지** → 최신 ${limit}개 영상을 1개씩 순차 분석·wiki화 합니다.\n` +
+            `분석 모드: **${modeLabel}** (영상당 LLM ${callsPerVideo}회 호출)\n` +
+            `각 영상은 자막추출 → LLM 분석 → wiki 저장 순으로 처리되며, 영상당 보통 30~${120 * callsPerVideo}초.\n` +
+            `중간에 멈추려면 Astra 사이드바의 ⏹ Stop 을 누르세요.\n\n`);
+    } else {
+        chunk(view, `📊 **분석 모드**: ${modeLabel}\n\n`);
+    }
+
+    chunk(view, `🎬 **YouTube 추출**: ${normalizedUrl}\n(자막 + 메타데이터${limit > 1 ? `, ${limit}개 영상` : ''})\n\n⏳ Python 추출기 기동 · 자막/메타 추출 중…`);
    // 1) extract — Bridge는 `source` 필드를 기대한다(`url`이 아님).
    const t0 = Date.now();
    const heartbeat = setInterval(() => {
        chunk(view, ` ·${Math.round((Date.now() - t0) / 1000)}s`);
    }, 4000);
+    // 채널은 영상 수에 비례해 yt-dlp 시간이 늘어남 — limit 비례 timeout 으로 완화.
+    const extractTimeoutMs = Math.max(5 * 60_000, limit * 60_000);
    const data = await bridgeFetch<{ success: boolean; videos?: any[]; totalVideos?: number }>(
        '/api/youtube/extract',
-        { method: 'POST', body: JSON.stringify({ source: url, withMetadata: true, limit: 5 }) },
-        { timeoutMs: 5 * 60_000 },
+        { method: 'POST', body: JSON.stringify({ source: normalizedUrl, withMetadata: true, limit }) },
+        {
+            timeoutMs: extractTimeoutMs,
+            onHeartbeat: limit > 1
+                ? (elapsedMs) => chunk(view, `\n  · 추출 진행 중 (${Math.round(elapsedMs / 1000)}s, ${limit}개 영상)\n`)
+                : undefined,
+        },
    ).finally(() => clearInterval(heartbeat));

    const okVideos = (data.videos || []).filter((v: any) => v?.status === 'ok');
@@ -856,39 +1184,86 @@ async function runYoutube(arg: string, view: Webview | undefined): Promise<boole

    const cfg = vscode.workspace.getConfiguration('g1nation');
    const model = (cfg.get<string>('defaultModel', '') || 'gemma4:e2b').trim();
-    const ytSystem = '당신은 유튜브 콘텐츠 시니어 PD입니다. 데이터에 근거한 제작 가이드만 제공하세요.';
+    // 시스템 프롬프트는 모드별로 분리 — info 는 *큐레이터* 톤, benchmark 는 *PD* 톤.
+    // 작은 모델일수록 system prompt 의 역할 정의가 출력 품질을 크게 좌우.
+    const sysInfo = '당신은 영상 콘텐츠를 지식 카드로 변환하는 정보 큐레이터입니다. 자막에 명시된 사실만 인용하세요.';
+    const sysBench = '당신은 유튜브 콘텐츠 시니어 PD입니다. 데이터에 근거한 제작 가이드만 제공하세요.';

-    // 2) 영상마다 LLM 4-렌즈 분석 (보통 1건; 채널/플레이리스트면 순차).
-    for (const video of okVideos) {
+    // 각 영상의 분석을 mode 에 따라 1회 또는 2회 LLM 호출.
+    // 결과는 (라벨, 보고서 본문) 의 배열로 모아 chat 출력 + wiki 저장에 같은 데이터 사용.
+    type Section = { label: string; body: string };
+    async function runOneAnalysis(video: any, prompt: string, system: string, sectionLabel: string, progressTag: string): Promise<Section | null> {
+        chunk(view, `🧪 **${sectionLabel}**${progressTag} (모델 \`${model}\`)…`);
+        try {
+            const t = Date.now();
+            const body = await callLmSynthesis(prompt, system);
+            if (!body) throw new Error('LLM 응답이 비어 있습니다.');
+            chunk(view, ` ✓ (${Math.round((Date.now() - t) / 1000)}s)\n\n`);
+            chunk(view, body + '\n\n');
+            return { label: sectionLabel, body };
+        } catch (e: any) {
+            chunk(view, `\n\n⚠️ ${sectionLabel} 실패${progressTag}: ${e?.message || String(e)}\n`);
+            return null;
+        }
+    }
+
+    // 2) 영상마다 LLM 분석 → wiki 저장. **queue 처럼 1개씩 순차** —
+    //    채널 N개면 i/N 진행 표시. 하나가 실패해도 다음으로 계속 (continue 로
+    //    skip), 다 끝나면 마지막에 통계 요약을 한 줄로 흘림.
+    const total = okVideos.length;
+    let analyzedOk = 0;
+    let analyzedFail = 0;
+    let savedOk = 0;
+    let savedFail = 0;
+    const batchT0 = Date.now();
+    for (let i = 0; i < okVideos.length; i++) {
+        const video = okVideos[i];
        const vTitle = video?.metadata?.title || video?.title || video?.video_id || '(제목 없음)';
+        const progressTag = total > 1 ? ` [${i + 1}/${total}]` : '';
+
+        if (total > 1) {
+            chunk(view, `\n━━━ **${progressTag.trim()} ${vTitle}** ━━━\n\n`);
+        }

        // 보고서 앞에 영상 전체 스크립트를 먼저 출력 — 분석과 원문 대본을 함께 보도록.
        const script = fullScriptFromSegments(video?.segments);
        chunk(view, `## 📜 전체 스크립트 (Full Script)\n\n${script}\n\n---\n\n`);

-        chunk(view, `🧪 **LLM 4-렌즈 분석**: ${vTitle} (모델 \`${model}\`)\n모델·하드웨어에 따라 수 분 걸릴 수 있습니다…`);
-        let report: string;
-        try {
-            const partT0 = Date.now();
-            report = await callLmSynthesis(build4LensPrompt(video, userContent), ytSystem);
-            if (!report) throw new Error('LLM 응답이 비어 있습니다.');
-            chunk(view, ` ✓ (${Math.round((Date.now() - partT0) / 1000)}s)\n\n`);
-        } catch (e: any) {
-            chunk(view, `\n\n⚠️ LLM 분석 실패: ${e?.message || String(e)}\n(LM 서버가 떠 있는지, \`g1nation.ollamaUrl\` / \`defaultModel\` 설정을 확인하세요.)\n\n`);
+        // mode 분기 — info / benchmark / both 에 맞게 0~2회 LLM 호출.
+        const sections: Section[] = [];
+        if (mode === 'info' || mode === 'both') {
+            const sec = await runOneAnalysis(video, buildInfoExtractionPrompt(video, userContent), sysInfo, '📋 정보 추출 (지식 카드)', progressTag);
+            if (sec) sections.push(sec);
+        }
+        if (mode === 'benchmark' || mode === 'both') {
+            const sec = await runOneAnalysis(video, build4LensPrompt(video, userContent), sysBench, '🎬 벤치마킹 (4-렌즈 역기획서)', progressTag);
+            if (sec) sections.push(sec);
+        }
+
+        if (sections.length === 0) {
+            analyzedFail++;
+            chunk(view, `(LM 서버가 떠 있는지, \`g1nation.ollamaUrl\` / \`defaultModel\` 설정을 확인하세요.)\n\n`);
            continue;
        }
-        chunk(view, report + '\n\n');
+        analyzedOk++;

        // 3) save — benchmark와 동일하게 /api/wiki/save (datacollectSavePath > WIKI_RAW_PATH).
+        // wiki 본문은 위에서 LLM 호출한 sections 를 그대로 한 파일에 이어붙여 보관.
        try {
            const today = new Date().toISOString().slice(0, 10);
            const videoUrl = video?.metadata?.webpage_url || `https://www.youtube.com/watch?v=${video?.video_id}`;
-            const title = `유튜브분석 ${vTitle} ${today}`;
+            // mode 별로 파일명 접미사 — 같은 영상의 info / benchmark / both 가 한 폴더에서 구분되도록.
+            const modeSuffix = mode === 'info' ? ' (정보)'
+                : mode === 'benchmark' ? ' (벤치마크)'
+                : '';
+            const title = `유튜브분석 ${vTitle}${modeSuffix} ${today}`;
+            const sectionDivider = sections.length > 1 ? `\n\n---\n\n` : '';
            const fileMarkdown = [
                `# ${title}`,
                ``,
                `- **영상 URL**: ${videoUrl}`,
                `- **분석 시각**: ${new Date().toISOString()}`,
+                `- **분석 모드**: ${mode}`,
                `- **생성**: Astra /youtube · Datacollect youtube insight`,
                ``,
                `## 📜 전체 스크립트 (Full Script)`,
@@ -897,7 +1272,7 @@ async function runYoutube(arg: string, view: Webview | undefined): Promise<boole
                ``,
                `---`,
                ``,
-                report,
+                sections.map((s) => s.body).join(sectionDivider),
                ``,
            ].join('\n');
            const savePath = (cfg.get<string>('datacollectSavePath', '') || '').trim();
@@ -908,11 +1283,23 @@ async function runYoutube(arg: string, view: Webview | undefined): Promise<boole
                { method: 'POST', body: JSON.stringify(body) },
                { timeoutMs: 30_000 },
            );
-            chunk(view, `💾 **결과물 저장 완료**: \`${saved?.path || '(경로 미확인)'}\`\n\n`);
+            savedOk++;
+            chunk(view, `💾 **결과물 저장 완료**${progressTag}: \`${saved?.path || '(경로 미확인)'}\`\n\n`);
        } catch (e: any) {
-            chunk(view, `⚠️ 결과물 저장 실패: ${e?.message || String(e)}\n\n`);
+            savedFail++;
+            chunk(view, `⚠️ 결과물 저장 실패${progressTag}: ${e?.message || String(e)}\n\n`);
        }
    }
+
+    // 배치 처리(=채널/플레이리스트) 끝나면 통계 한 줄로 마무리. 단일 영상은 위에서 이미 끝.
+    if (total > 1) {
+        const batchSec = Math.round((Date.now() - batchT0) / 1000);
+        chunk(view, `\n━━━━━━━━━━━━━━━━━━━━\n`
+            + `🏁 **배치 완료** (총 ${batchSec}s · ${total}개 영상)\n`
+            + `- 분석: ✅ ${analyzedOk} / ❌ ${analyzedFail}\n`
+            + `- 저장: 💾 ${savedOk} / ⚠️ ${savedFail}\n`);
+    }
+
    return true;
 }

@@ -0,0 +1,267 @@
+/**
+ * Datacollect 의존성(Python 패키지) 자동 설치/검증 모듈.
+ *
+ * 의도: Astra extension 만 깔고 끝나면 `/youtube`, `/research` 같은 datacollect
+ * 슬래시 명령은 bridge 의 Python 의존성 (`yt-dlp`, `youtube-transcript-api`) 이
+ * 없어서 실패한다. 사용자가 그걸 매번 수동으로 깔아야 하는 friction 을 없애려고
+ * VS Code 명령 + notification 으로 "한 클릭 설치" 경로 제공.
+ *
+ * 범위:
+ *   - 우리가 자동으로 깔 수 있는 것 → Python 패키지만. (bridge 자체는 별도
+ *     Datacollector_MAC 프로젝트라 사용자가 그쪽에서 `npm install + npm run bridge`
+ *     해야 함 — 우리는 안내만)
+ *   - 시스템 Python 자체 설치는 OS 정책상 자동화 위험 → python.org 링크 안내만.
+ */
+import * as vscode from 'vscode';
+import { spawn } from 'child_process';
+import { logInfo, logError } from '../../utils';
+
+/** Datacollect 슬래시 명령들이 의존하는 Python 패키지. */
+export const REQUIRED_PY_PACKAGES = ['yt-dlp', 'youtube-transcript-api'] as const;
+
+export interface PythonProbe {
+    /** Detected python executable (`python3` / `python` / `py`) or null. */
+    pythonCmd: string | null;
+    /** Python --version string (or null if not found). */
+    version: string | null;
+    /** Importable packages we required. */
+    installedPackages: Set<string>;
+    /** Required packages that aren't currently importable. */
+    missingPackages: string[];
+}
+
+/**
+ * Python 가용 여부 + 필수 패키지 import 여부를 한 번에 진단.
+ * 어느 단계에서든 실패해도 throw 하지 않고 `pythonCmd: null` 또는
+ * `missingPackages` 채워서 돌려준다 — 호출자가 UI 분기 하기 쉽도록.
+ */
+export async function probePythonEnv(): Promise<PythonProbe> {
+    const result: PythonProbe = {
+        pythonCmd: null,
+        version: null,
+        installedPackages: new Set<string>(),
+        missingPackages: [...REQUIRED_PY_PACKAGES],
+    };
+
+    // Windows 는 `python` / `py` 가 보통 우선, 그 외엔 `python3` 가 안전.
+    const candidates = process.platform === 'win32'
+        ? ['python', 'py', 'python3']
+        : ['python3', 'python'];
+
+    for (const cmd of candidates) {
+        const ver = await _capture(cmd, ['--version'], 5_000);
+        if (ver.exitCode === 0 && /Python\s+3\./i.test(ver.stdout + ver.stderr)) {
+            result.pythonCmd = cmd;
+            result.version = (ver.stdout || ver.stderr).trim();
+            break;
+        }
+    }
+    if (!result.pythonCmd) return result;
+
+    // 패키지 import 시도 — 패키지명과 import 명이 다른 케이스(`yt-dlp` → `yt_dlp`)는
+    // 매핑 테이블로 처리.
+    const importNameOf: Record<string, string> = {
+        'yt-dlp': 'yt_dlp',
+        'youtube-transcript-api': 'youtube_transcript_api',
+    };
+    const missing: string[] = [];
+    for (const pkg of REQUIRED_PY_PACKAGES) {
+        const importName = importNameOf[pkg] ?? pkg.replace(/-/g, '_');
+        const probe = await _capture(result.pythonCmd, ['-c', `import ${importName}`], 8_000);
+        if (probe.exitCode === 0) {
+            result.installedPackages.add(pkg);
+        } else {
+            missing.push(pkg);
+        }
+    }
+    result.missingPackages = missing;
+    return result;
+}
+
+/**
+ * pip install 실행. macOS homebrew Python (PEP 668) 처럼 시스템 Python 보호가
+ * 켜져 있는 환경을 자동 감지해서 `--user --break-system-packages` 조합 시도.
+ * 사용자 site-packages 로 가서 시스템 Python 은 안 건드리는 안전한 형태.
+ *
+ * VS Code OutputChannel 에 진행상황 streaming. 성공/실패 boolean 반환.
+ */
+export async function installMissingPackages(
+    pythonCmd: string,
+    packages: string[],
+    output: vscode.OutputChannel,
+): Promise<boolean> {
+    if (packages.length === 0) return true;
+    output.appendLine(`\n[Astra Setup] pip install 시작: ${packages.join(', ')}`);
+    output.appendLine(`[Astra Setup] Python: ${pythonCmd}`);
+
+    // 1차 시도: 표준 user-install. 정상 환경에선 충분.
+    const firstAttempt = await _streamCommand(pythonCmd, ['-m', 'pip', 'install', '--user', ...packages], output, 5 * 60_000);
+    if (firstAttempt) {
+        output.appendLine(`[Astra Setup] ✅ 설치 성공 (--user)`);
+        return true;
+    }
+
+    // PEP 668 (homebrew / debian 등) 보호 환경 자동 폴백. --break-system-packages
+    // 라는 이름이 무서워 보이지만, `--user` 와 함께 쓰면 user site 로 가서 시스템
+    // Python 을 건드리지 않는다. 일반 패키지(yt-dlp 등) 설치엔 안전.
+    output.appendLine(`[Astra Setup] ⚠️ 1차 실패. PEP 668 환경으로 추정 → --break-system-packages 폴백 시도.`);
+    const secondAttempt = await _streamCommand(
+        pythonCmd,
+        ['-m', 'pip', 'install', '--user', '--break-system-packages', ...packages],
+        output,
+        5 * 60_000,
+    );
+    if (secondAttempt) {
+        output.appendLine(`[Astra Setup] ✅ 설치 성공 (--user --break-system-packages)`);
+        return true;
+    }
+
+    output.appendLine(`[Astra Setup] ❌ 두 차례 시도 모두 실패. 수동 설치가 필요할 수 있습니다.`);
+    return false;
+}
+
+/**
+ * VS Code 명령 핸들러 — `Astra: Setup Datacollect Dependencies` 의 본체.
+ * 사용자가 명령 팔레트에서 직접 호출하거나 notification 의 "Install Now" 버튼이
+ * 호출. 모든 진행상황은 OutputChannel + window message 로 통보.
+ */
+export async function runDatacollectSetup(): Promise<void> {
+    const output = vscode.window.createOutputChannel('Astra Setup');
+    output.show(true);
+    output.appendLine('🔧 Astra Datacollect 의존성 점검 시작...');
+    output.appendLine('');
+
+    const probe = await probePythonEnv();
+    if (!probe.pythonCmd) {
+        output.appendLine('❌ Python 3 을 찾지 못했습니다.');
+        output.appendLine('   - macOS: brew install python3  또는  https://www.python.org 에서 설치');
+        output.appendLine('   - Windows: https://www.python.org 에서 설치 (Add Python to PATH 체크)');
+        output.appendLine('   - Linux: 패키지 매니저로 설치 (apt install python3 / yum install python3 …)');
+        vscode.window.showErrorMessage(
+            'Astra Setup: Python 3 이 PATH 에 없습니다. python.org 에서 설치 후 다시 시도하세요.',
+            'python.org 열기',
+        ).then((pick) => {
+            if (pick === 'python.org 열기') {
+                vscode.env.openExternal(vscode.Uri.parse('https://www.python.org/downloads/'));
+            }
+        });
+        return;
+    }
+    output.appendLine(`✅ Python 감지: ${probe.pythonCmd} (${probe.version})`);
+    output.appendLine(`   설치된 패키지: ${Array.from(probe.installedPackages).join(', ') || '(없음)'}`);
+    output.appendLine(`   누락된 패키지: ${probe.missingPackages.join(', ') || '(없음)'}`);
+    output.appendLine('');
+
+    if (probe.missingPackages.length === 0) {
+        output.appendLine('🎉 필수 패키지가 모두 설치돼 있습니다. 아무 작업도 필요 없습니다.');
+        vscode.window.showInformationMessage('Astra Setup: 모든 Python 의존성이 이미 설치돼 있습니다.');
+        return;
+    }
+
+    const ok = await installMissingPackages(probe.pythonCmd, probe.missingPackages, output);
+    if (ok) {
+        // 설치 후 재검증 — 정말 import 되는지 한 번 더 확인.
+        const after = await probePythonEnv();
+        if (after.missingPackages.length === 0) {
+            output.appendLine('\n✅ 설치 후 import 검증 통과. Datacollect 슬래시 명령을 바로 쓸 수 있습니다.');
+            vscode.window.showInformationMessage(
+                `Astra Setup 완료: ${probe.missingPackages.join(', ')} 설치됨. /youtube /research 등 다시 시도해 보세요.`,
+            );
+        } else {
+            output.appendLine(`\n⚠️ pip 은 성공으로 끝났지만 import 검증에서 여전히 ${after.missingPackages.join(', ')} 가 안 보입니다.`);
+            output.appendLine('   Python 인터프리터가 여러 개 있거나 venv 가 활성화돼 있을 수 있어요. 터미널에서 직접 확인해 보세요:');
+            output.appendLine(`     ${probe.pythonCmd} -c "import yt_dlp; print(yt_dlp.__file__)"`);
+            vscode.window.showWarningMessage('Astra Setup: 설치는 완료됐지만 import 검증 실패. Output 채널 확인.');
+        }
+    } else {
+        vscode.window.showErrorMessage(
+            'Astra Setup: pip install 실패. Output 채널의 로그를 확인하세요.',
+            'Output 열기',
+        ).then((pick) => {
+            if (pick === 'Output 열기') output.show(true);
+        });
+    }
+}
+
+/**
+ * 패키지 미설치 사용자에게 보여주는 친절한 notification.
+ * slashRouter 의 에러 catch 에서 "필수 패키지가 없습니다" 패턴을 잡으면 이걸
+ * 호출 → 사용자가 "Install Now" 한 번 누르면 위 setup 명령이 돌아간다.
+ *
+ * Idempotent — 사용자가 dismiss 하거나 무시해도 다음 에러에서 다시 뜸.
+ */
+export async function offerInstallNotification(missingHint: string): Promise<void> {
+    const pick = await vscode.window.showWarningMessage(
+        `Astra: Datacollect 의 Python 의존성이 누락돼 있습니다 (${missingHint}). 지금 자동 설치할까요?`,
+        { modal: false },
+        'Install Now',
+        '나중에',
+    );
+    if (pick === 'Install Now') {
+        await vscode.commands.executeCommand('g1nation.setupDatacollect');
+    }
+}
+
+// ─── Internal helpers ───────────────────────────────────────────────────────
+
+function _capture(cmd: string, args: string[], timeoutMs: number): Promise<{ exitCode: number; stdout: string; stderr: string }> {
+    return new Promise((resolve) => {
+        let stdout = '';
+        let stderr = '';
+        let done = false;
+        const finish = (exitCode: number) => {
+            if (done) return;
+            done = true;
+            resolve({ exitCode, stdout, stderr });
+        };
+        try {
+            const proc = spawn(cmd, args, { shell: false, windowsHide: true });
+            const timer = setTimeout(() => {
+                try { proc.kill('SIGKILL'); } catch { /* noop */ }
+                finish(-1);
+            }, timeoutMs);
+            proc.stdout?.on('data', (b) => { stdout += b.toString(); });
+            proc.stderr?.on('data', (b) => { stderr += b.toString(); });
+            proc.on('error', () => { clearTimeout(timer); finish(-2); });
+            proc.on('close', (code) => { clearTimeout(timer); finish(code ?? -3); });
+        } catch {
+            finish(-4);
+        }
+    });
+}
+
+function _streamCommand(cmd: string, args: string[], output: vscode.OutputChannel, timeoutMs: number): Promise<boolean> {
+    return new Promise((resolve) => {
+        output.appendLine(`$ ${cmd} ${args.join(' ')}`);
+        let done = false;
+        const finish = (ok: boolean) => {
+            if (done) return;
+            done = true;
+            resolve(ok);
+        };
+        try {
+            const proc = spawn(cmd, args, { shell: false, windowsHide: true });
+            const timer = setTimeout(() => {
+                output.appendLine(`[Astra Setup] ⏱️ ${timeoutMs / 1000}s 초과. 프로세스를 종료합니다.`);
+                try { proc.kill('SIGKILL'); } catch { /* noop */ }
+                finish(false);
+            }, timeoutMs);
+            proc.stdout?.on('data', (b) => output.append(b.toString()));
+            proc.stderr?.on('data', (b) => output.append(b.toString()));
+            proc.on('error', (e) => {
+                clearTimeout(timer);
+                logError('[datacollectSetup] spawn error', e);
+                output.appendLine(`[Astra Setup] spawn 오류: ${e.message}`);
+                finish(false);
+            });
+            proc.on('close', (code) => {
+                clearTimeout(timer);
+                logInfo(`[datacollectSetup] ${cmd} exited with code=${code}`);
+                finish(code === 0);
+            });
+        } catch (e: any) {
+            output.appendLine(`[Astra Setup] 실행 실패: ${e?.message ?? String(e)}`);
+            finish(false);
+        }
+    });
+}