Files
photoai/scripts/verify-index.mjs
T
koriweb 72c41ae834 Add NextGen library: index DB, thumbnails, AI culling, and CLIP search
Builds the "indexed library" foundation and first intelligent features on
top of the organizer (sql.js index, non-destructive in-place indexing).

Phase 0 — Library index:
- sql.js (WASM SQLite) index DB; contentHash-keyed assets, resumable indexing
  (skip by path+mtime), batch persistence (chosen over native better-sqlite3
  which fails to build on Node 24 / Python 3.12)
- Library folders (in place, non-destructive) + background indexer w/ progress
- Thumbnails generated in the AI worker (canvas->webp), cached in userData;
  served via photoai-media://thumb by hash; thumbnail grid w/ pagination

Phase 1 — AI quality assessment & culling:
- Focus (Laplacian variance), exposure (histogram), eyes-open (face-api EAR)
  computed in one analyze pass alongside the thumbnail
- Culling filters (candidate/rejected) + quality badges
- Adjustable thresholds (live SQL re-classification from stored raw scores,
  no re-analysis) + manual star rating (0-5) and color labels (usermeta)

Phase 2 — CLIP natural-language / similarity search:
- @huggingface/transformers (WASM/WebGPU, no native build)
- CLIP image/text embeddings (lazy-loaded); Korean queries auto-translated
  via opus-mt-ko-en into the English CLIP
- Embeddings stored as SQLite BLOBs; "build search index" batch w/ progress;
  brute-force cosine search; new Search tab
- Note: models download from HF Hub on first use; fully-offline ORT-wasm
  packaging and KO search-accuracy tuning are follow-ups

Tabs added (Organize / Library / Search). All typecheck/tests(12)/build green;
boot smoke verified across phases.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 17:32:51 +09:00

101 lines
3.7 KiB
JavaScript

// Phase 0-b 색인 파이프라인 헤드리스 검증.
// sql.js 스키마 + 폴더 워크 + 샘플 해시 + upsert + count 가 실제로 동작하는지 확인.
// node scripts/verify-index.mjs [folder]
import initSqlJs from 'sql.js'
import { readdir, stat, open, mkdtemp, writeFile, rm } from 'node:fs/promises'
import { readFileSync } from 'node:fs'
import { join, extname, dirname } from 'node:path'
import { tmpdir } from 'node:os'
import { createHash } from 'node:crypto'
import { fileURLToPath } from 'node:url'
const __dirname = dirname(fileURLToPath(import.meta.url))
const IMG = new Set(['.jpg', '.jpeg', '.png', '.webp', '.mp4', '.mov'])
async function* walk(root) {
for (const e of await readdir(root, { withFileTypes: true })) {
const full = join(root, e.name)
if (e.isDirectory()) yield* walk(full)
else if (e.isFile() && IMG.has(extname(e.name).toLowerCase())) yield full
}
}
async function contentHash(path) {
const s = await stat(path)
const h = createHash('sha1')
h.update(String(s.size))
const len = Math.min(512 * 1024, s.size)
if (len > 0) {
const fh = await open(path, 'r')
try {
const buf = Buffer.alloc(len)
await fh.read(buf, 0, len, 0)
h.update(buf)
} finally {
await fh.close()
}
}
return h.digest('hex')
}
async function main() {
// 인자 폴더 없으면 더미 이미지 2개로 임시 폴더 생성
let folder = process.argv[2]
let temp = null
if (!folder) {
temp = await mkdtemp(join(tmpdir(), 'photoai-idx-'))
await writeFile(join(temp, 'a.jpg'), Buffer.from('dummy-image-a'))
await writeFile(join(temp, 'b.png'), Buffer.from('dummy-image-b-different'))
folder = temp
console.log('테스트 폴더 생성:', folder)
}
const wasm = readFileSync(join(__dirname, '..', 'node_modules', 'sql.js', 'dist', 'sql-wasm.wasm'))
const SQL = await initSqlJs({ wasmBinary: new Uint8Array(wasm).buffer })
const db = new SQL.Database()
db.run(`CREATE TABLE asset (
id INTEGER PRIMARY KEY AUTOINCREMENT, contentHash TEXT UNIQUE NOT NULL,
path TEXT, ext TEXT, sizeBytes INTEGER, mtime INTEGER, indexedAt INTEGER);
CREATE INDEX idx_path ON asset(path);`)
let indexed = 0
for await (const file of walk(folder)) {
const s = await stat(file)
const hash = await contentHash(file)
db.run(
`INSERT INTO asset (contentHash,path,ext,sizeBytes,mtime,indexedAt)
VALUES (?,?,?,?,?,?)
ON CONFLICT(contentHash) DO UPDATE SET path=excluded.path, mtime=excluded.mtime`,
[hash, file, extname(file).toLowerCase(), s.size, Math.floor(s.mtimeMs), Date.now()]
)
indexed++
}
const n = db.exec('SELECT COUNT(*) FROM asset')[0].values[0][0]
const sample = db.exec('SELECT contentHash, path FROM asset LIMIT 2')
console.log(`색인 처리: ${indexed}건, DB asset 수: ${n}`)
if (sample[0]) for (const row of sample[0].values) console.log(' row:', row[0].slice(0, 12), '…', row[1])
// 재실행 시 중복 안 늘어나는지(upsert) 확인
for await (const file of walk(folder)) {
const s = await stat(file)
const hash = await contentHash(file)
db.run(
`INSERT INTO asset (contentHash,path,ext,sizeBytes,mtime,indexedAt)
VALUES (?,?,?,?,?,?) ON CONFLICT(contentHash) DO UPDATE SET mtime=excluded.mtime`,
[hash, file, extname(file).toLowerCase(), s.size, Math.floor(s.mtimeMs), Date.now()]
)
}
const n2 = db.exec('SELECT COUNT(*) FROM asset')[0].values[0][0]
console.log(`재실행 후 asset 수(중복 없어야 함): ${n2}`)
console.log(n === n2 ? 'PASS: upsert 중복 없음' : 'FAIL: 중복 발생')
db.close()
if (temp) await rm(temp, { recursive: true, force: true })
}
main().catch((e) => {
console.error('오류:', e)
process.exit(1)
})