$
This commit is contained in:
257
node_modules/cacache/lib/verify.js
generated
vendored
Normal file
257
node_modules/cacache/lib/verify.js
generated
vendored
Normal file
@@ -0,0 +1,257 @@
|
||||
'use strict'
|
||||
|
||||
const util = require('util')
|
||||
|
||||
const pMap = require('p-map')
|
||||
const contentPath = require('./content/path')
|
||||
const fixOwner = require('./util/fix-owner')
|
||||
const fs = require('@npmcli/fs')
|
||||
const fsm = require('fs-minipass')
|
||||
const glob = util.promisify(require('glob'))
|
||||
const index = require('./entry-index')
|
||||
const path = require('path')
|
||||
const rimraf = util.promisify(require('rimraf'))
|
||||
const ssri = require('ssri')
|
||||
|
||||
const globify = pattern => pattern.split('\\').join('/')
|
||||
|
||||
const hasOwnProperty = (obj, key) =>
|
||||
Object.prototype.hasOwnProperty.call(obj, key)
|
||||
|
||||
const verifyOpts = (opts) => ({
|
||||
concurrency: 20,
|
||||
log: { silly () {} },
|
||||
...opts,
|
||||
})
|
||||
|
||||
module.exports = verify
|
||||
|
||||
async function verify (cache, opts) {
|
||||
opts = verifyOpts(opts)
|
||||
opts.log.silly('verify', 'verifying cache at', cache)
|
||||
|
||||
const steps = [
|
||||
markStartTime,
|
||||
fixPerms,
|
||||
garbageCollect,
|
||||
rebuildIndex,
|
||||
cleanTmp,
|
||||
writeVerifile,
|
||||
markEndTime,
|
||||
]
|
||||
|
||||
const stats = {}
|
||||
for (const step of steps) {
|
||||
const label = step.name
|
||||
const start = new Date()
|
||||
const s = await step(cache, opts)
|
||||
if (s) {
|
||||
Object.keys(s).forEach((k) => {
|
||||
stats[k] = s[k]
|
||||
})
|
||||
}
|
||||
const end = new Date()
|
||||
if (!stats.runTime) {
|
||||
stats.runTime = {}
|
||||
}
|
||||
stats.runTime[label] = end - start
|
||||
}
|
||||
stats.runTime.total = stats.endTime - stats.startTime
|
||||
opts.log.silly(
|
||||
'verify',
|
||||
'verification finished for',
|
||||
cache,
|
||||
'in',
|
||||
`${stats.runTime.total}ms`
|
||||
)
|
||||
return stats
|
||||
}
|
||||
|
||||
async function markStartTime (cache, opts) {
|
||||
return { startTime: new Date() }
|
||||
}
|
||||
|
||||
async function markEndTime (cache, opts) {
|
||||
return { endTime: new Date() }
|
||||
}
|
||||
|
||||
async function fixPerms (cache, opts) {
|
||||
opts.log.silly('verify', 'fixing cache permissions')
|
||||
await fixOwner.mkdirfix(cache, cache)
|
||||
// TODO - fix file permissions too
|
||||
await fixOwner.chownr(cache, cache)
|
||||
return null
|
||||
}
|
||||
|
||||
// Implements a naive mark-and-sweep tracing garbage collector.
|
||||
//
|
||||
// The algorithm is basically as follows:
|
||||
// 1. Read (and filter) all index entries ("pointers")
|
||||
// 2. Mark each integrity value as "live"
|
||||
// 3. Read entire filesystem tree in `content-vX/` dir
|
||||
// 4. If content is live, verify its checksum and delete it if it fails
|
||||
// 5. If content is not marked as live, rimraf it.
|
||||
//
|
||||
async function garbageCollect (cache, opts) {
|
||||
opts.log.silly('verify', 'garbage collecting content')
|
||||
const indexStream = index.lsStream(cache)
|
||||
const liveContent = new Set()
|
||||
indexStream.on('data', (entry) => {
|
||||
if (opts.filter && !opts.filter(entry)) {
|
||||
return
|
||||
}
|
||||
|
||||
liveContent.add(entry.integrity.toString())
|
||||
})
|
||||
await new Promise((resolve, reject) => {
|
||||
indexStream.on('end', resolve).on('error', reject)
|
||||
})
|
||||
const contentDir = contentPath.contentDir(cache)
|
||||
const files = await glob(globify(path.join(contentDir, '**')), {
|
||||
follow: false,
|
||||
nodir: true,
|
||||
nosort: true,
|
||||
})
|
||||
const stats = {
|
||||
verifiedContent: 0,
|
||||
reclaimedCount: 0,
|
||||
reclaimedSize: 0,
|
||||
badContentCount: 0,
|
||||
keptSize: 0,
|
||||
}
|
||||
await pMap(
|
||||
files,
|
||||
async (f) => {
|
||||
const split = f.split(/[/\\]/)
|
||||
const digest = split.slice(split.length - 3).join('')
|
||||
const algo = split[split.length - 4]
|
||||
const integrity = ssri.fromHex(digest, algo)
|
||||
if (liveContent.has(integrity.toString())) {
|
||||
const info = await verifyContent(f, integrity)
|
||||
if (!info.valid) {
|
||||
stats.reclaimedCount++
|
||||
stats.badContentCount++
|
||||
stats.reclaimedSize += info.size
|
||||
} else {
|
||||
stats.verifiedContent++
|
||||
stats.keptSize += info.size
|
||||
}
|
||||
} else {
|
||||
// No entries refer to this content. We can delete.
|
||||
stats.reclaimedCount++
|
||||
const s = await fs.stat(f)
|
||||
await rimraf(f)
|
||||
stats.reclaimedSize += s.size
|
||||
}
|
||||
return stats
|
||||
},
|
||||
{ concurrency: opts.concurrency }
|
||||
)
|
||||
return stats
|
||||
}
|
||||
|
||||
async function verifyContent (filepath, sri) {
|
||||
const contentInfo = {}
|
||||
try {
|
||||
const { size } = await fs.stat(filepath)
|
||||
contentInfo.size = size
|
||||
contentInfo.valid = true
|
||||
await ssri.checkStream(new fsm.ReadStream(filepath), sri)
|
||||
} catch (err) {
|
||||
if (err.code === 'ENOENT') {
|
||||
return { size: 0, valid: false }
|
||||
}
|
||||
if (err.code !== 'EINTEGRITY') {
|
||||
throw err
|
||||
}
|
||||
|
||||
await rimraf(filepath)
|
||||
contentInfo.valid = false
|
||||
}
|
||||
return contentInfo
|
||||
}
|
||||
|
||||
async function rebuildIndex (cache, opts) {
|
||||
opts.log.silly('verify', 'rebuilding index')
|
||||
const entries = await index.ls(cache)
|
||||
const stats = {
|
||||
missingContent: 0,
|
||||
rejectedEntries: 0,
|
||||
totalEntries: 0,
|
||||
}
|
||||
const buckets = {}
|
||||
for (const k in entries) {
|
||||
/* istanbul ignore else */
|
||||
if (hasOwnProperty(entries, k)) {
|
||||
const hashed = index.hashKey(k)
|
||||
const entry = entries[k]
|
||||
const excluded = opts.filter && !opts.filter(entry)
|
||||
excluded && stats.rejectedEntries++
|
||||
if (buckets[hashed] && !excluded) {
|
||||
buckets[hashed].push(entry)
|
||||
} else if (buckets[hashed] && excluded) {
|
||||
// skip
|
||||
} else if (excluded) {
|
||||
buckets[hashed] = []
|
||||
buckets[hashed]._path = index.bucketPath(cache, k)
|
||||
} else {
|
||||
buckets[hashed] = [entry]
|
||||
buckets[hashed]._path = index.bucketPath(cache, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
await pMap(
|
||||
Object.keys(buckets),
|
||||
(key) => {
|
||||
return rebuildBucket(cache, buckets[key], stats, opts)
|
||||
},
|
||||
{ concurrency: opts.concurrency }
|
||||
)
|
||||
return stats
|
||||
}
|
||||
|
||||
async function rebuildBucket (cache, bucket, stats, opts) {
|
||||
await fs.truncate(bucket._path)
|
||||
// This needs to be serialized because cacache explicitly
|
||||
// lets very racy bucket conflicts clobber each other.
|
||||
for (const entry of bucket) {
|
||||
const content = contentPath(cache, entry.integrity)
|
||||
try {
|
||||
await fs.stat(content)
|
||||
await index.insert(cache, entry.key, entry.integrity, {
|
||||
metadata: entry.metadata,
|
||||
size: entry.size,
|
||||
})
|
||||
stats.totalEntries++
|
||||
} catch (err) {
|
||||
if (err.code === 'ENOENT') {
|
||||
stats.rejectedEntries++
|
||||
stats.missingContent++
|
||||
} else {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function cleanTmp (cache, opts) {
|
||||
opts.log.silly('verify', 'cleaning tmp directory')
|
||||
return rimraf(path.join(cache, 'tmp'))
|
||||
}
|
||||
|
||||
function writeVerifile (cache, opts) {
|
||||
const verifile = path.join(cache, '_lastverified')
|
||||
opts.log.silly('verify', 'writing verifile to ' + verifile)
|
||||
try {
|
||||
return fs.writeFile(verifile, `${Date.now()}`)
|
||||
} finally {
|
||||
fixOwner.chownr.sync(cache, verifile)
|
||||
}
|
||||
}
|
||||
|
||||
module.exports.lastRun = lastRun
|
||||
|
||||
async function lastRun (cache) {
|
||||
const data = await fs.readFile(path.join(cache, '_lastverified'), { encoding: 'utf8' })
|
||||
return new Date(+data)
|
||||
}
|
Reference in New Issue
Block a user