// Phase 0-b 색인 파이프라인 헤드리스 검증. // sql.js 스키마 + 폴더 워크 + 샘플 해시 + upsert + count 가 실제로 동작하는지 확인. // node scripts/verify-index.mjs [folder] import initSqlJs from 'sql.js' import { readdir, stat, open, mkdtemp, writeFile, rm } from 'node:fs/promises' import { readFileSync } from 'node:fs' import { join, extname, dirname } from 'node:path' import { tmpdir } from 'node:os' import { createHash } from 'node:crypto' import { fileURLToPath } from 'node:url' const __dirname = dirname(fileURLToPath(import.meta.url)) const IMG = new Set(['.jpg', '.jpeg', '.png', '.webp', '.mp4', '.mov']) async function* walk(root) { for (const e of await readdir(root, { withFileTypes: true })) { const full = join(root, e.name) if (e.isDirectory()) yield* walk(full) else if (e.isFile() && IMG.has(extname(e.name).toLowerCase())) yield full } } async function contentHash(path) { const s = await stat(path) const h = createHash('sha1') h.update(String(s.size)) const len = Math.min(512 * 1024, s.size) if (len > 0) { const fh = await open(path, 'r') try { const buf = Buffer.alloc(len) await fh.read(buf, 0, len, 0) h.update(buf) } finally { await fh.close() } } return h.digest('hex') } async function main() { // 인자 폴더 없으면 더미 이미지 2개로 임시 폴더 생성 let folder = process.argv[2] let temp = null if (!folder) { temp = await mkdtemp(join(tmpdir(), 'photoai-idx-')) await writeFile(join(temp, 'a.jpg'), Buffer.from('dummy-image-a')) await writeFile(join(temp, 'b.png'), Buffer.from('dummy-image-b-different')) folder = temp console.log('테스트 폴더 생성:', folder) } const wasm = readFileSync(join(__dirname, '..', 'node_modules', 'sql.js', 'dist', 'sql-wasm.wasm')) const SQL = await initSqlJs({ wasmBinary: new Uint8Array(wasm).buffer }) const db = new SQL.Database() db.run(`CREATE TABLE asset ( id INTEGER PRIMARY KEY AUTOINCREMENT, contentHash TEXT UNIQUE NOT NULL, path TEXT, ext TEXT, sizeBytes INTEGER, mtime INTEGER, indexedAt INTEGER); CREATE INDEX idx_path ON asset(path);`) let indexed = 0 for await (const file of walk(folder)) { const s = await stat(file) const hash = await contentHash(file) db.run( `INSERT INTO asset (contentHash,path,ext,sizeBytes,mtime,indexedAt) VALUES (?,?,?,?,?,?) ON CONFLICT(contentHash) DO UPDATE SET path=excluded.path, mtime=excluded.mtime`, [hash, file, extname(file).toLowerCase(), s.size, Math.floor(s.mtimeMs), Date.now()] ) indexed++ } const n = db.exec('SELECT COUNT(*) FROM asset')[0].values[0][0] const sample = db.exec('SELECT contentHash, path FROM asset LIMIT 2') console.log(`색인 처리: ${indexed}건, DB asset 수: ${n}`) if (sample[0]) for (const row of sample[0].values) console.log(' row:', row[0].slice(0, 12), '…', row[1]) // 재실행 시 중복 안 늘어나는지(upsert) 확인 for await (const file of walk(folder)) { const s = await stat(file) const hash = await contentHash(file) db.run( `INSERT INTO asset (contentHash,path,ext,sizeBytes,mtime,indexedAt) VALUES (?,?,?,?,?,?) ON CONFLICT(contentHash) DO UPDATE SET mtime=excluded.mtime`, [hash, file, extname(file).toLowerCase(), s.size, Math.floor(s.mtimeMs), Date.now()] ) } const n2 = db.exec('SELECT COUNT(*) FROM asset')[0].values[0][0] console.log(`재실행 후 asset 수(중복 없어야 함): ${n2}`) console.log(n === n2 ? 'PASS: upsert 중복 없음' : 'FAIL: 중복 발생') db.close() if (temp) await rm(temp, { recursive: true, force: true }) } main().catch((e) => { console.error('오류:', e) process.exit(1) })