'use strict' // the PEND/UNPEND stuff tracks whether we're ready to emit end/close yet. // but the path reservations are required to avoid race conditions where // parallelized unpack ops may mess with one another, due to dependencies // (like a Link depending on its target) or destructive operations (like // clobbering an fs object to create one of a different type.) const assert = require('assert') const Parser = require('./parse.js') const fs = require('fs') const fsm = require('fs-minipass') const path = require('path') const mkdir = require('./mkdir.js') const wc = require('./winchars.js') const pathReservations = require('./path-reservations.js') const stripAbsolutePath = require('./strip-absolute-path.js') const normPath = require('./normalize-windows-path.js') const stripSlash = require('./strip-trailing-slashes.js') const normalize = require('./normalize-unicode.js') const ONENTRY = Symbol('onEntry') const CHECKFS = Symbol('checkFs') const CHECKFS2 = Symbol('checkFs2') const PRUNECACHE = Symbol('pruneCache') const ISREUSABLE = Symbol('isReusable') const MAKEFS = Symbol('makeFs') const FILE = Symbol('file') const DIRECTORY = Symbol('directory') const LINK = Symbol('link') const SYMLINK = Symbol('symlink') const HARDLINK = Symbol('hardlink') const UNSUPPORTED = Symbol('unsupported') const CHECKPATH = Symbol('checkPath') const MKDIR = Symbol('mkdir') const ONERROR = Symbol('onError') const PENDING = Symbol('pending') const PEND = Symbol('pend') const UNPEND = Symbol('unpend') const ENDED = Symbol('ended') const MAYBECLOSE = Symbol('maybeClose') const SKIP = Symbol('skip') const DOCHOWN = Symbol('doChown') const UID = Symbol('uid') const GID = Symbol('gid') const CHECKED_CWD = Symbol('checkedCwd') const crypto = require('crypto') const getFlag = require('./get-write-flag.js') const platform = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform const isWindows = platform === 'win32' // Unlinks on Windows are not atomic. // // This means that if you have a file entry, followed by another // file entry with an identical name, and you cannot re-use the file // (because it's a hardlink, or because unlink:true is set, or it's // Windows, which does not have useful nlink values), then the unlink // will be committed to the disk AFTER the new file has been written // over the old one, deleting the new file. // // To work around this, on Windows systems, we rename the file and then // delete the renamed file. It's a sloppy kludge, but frankly, I do not // know of a better way to do this, given windows' non-atomic unlink // semantics. // // See: https://github.com/npm/node-tar/issues/183 /* istanbul ignore next */ const unlinkFile = (path, cb) => { if (!isWindows) return fs.unlink(path, cb) const name = path + '.DELETE.' + crypto.randomBytes(16).toString('hex') fs.rename(path, name, er => { if (er) return cb(er) fs.unlink(name, cb) }) } /* istanbul ignore next */ const unlinkFileSync = path => { if (!isWindows) return fs.unlinkSync(path) const name = path + '.DELETE.' + crypto.randomBytes(16).toString('hex') fs.renameSync(path, name) fs.unlinkSync(name) } // this.gid, entry.gid, this.processUid const uint32 = (a, b, c) => a === a >>> 0 ? a : b === b >>> 0 ? b : c // clear the cache if it's a case-insensitive unicode-squashing match. // we can't know if the current file system is case-sensitive or supports // unicode fully, so we check for similarity on the maximally compatible // representation. Err on the side of pruning, since all it's doing is // preventing lstats, and it's not the end of the world if we get a false // positive. // Note that on windows, we always drop the entire cache whenever a // symbolic link is encountered, because 8.3 filenames are impossible // to reason about, and collisions are hazards rather than just failures. const cacheKeyNormalize = path => normalize(stripSlash(normPath(path))) .toLowerCase() const pruneCache = (cache, abs) => { abs = cacheKeyNormalize(abs) for (const path of cache.keys()) { const pnorm = cacheKeyNormalize(path) if (pnorm === abs || pnorm.indexOf(abs + '/') === 0) cache.delete(path) } } const dropCache = cache => { for (const key of cache.keys()) cache.delete(key) } class Unpack extends Parser { constructor (opt) { if (!opt) opt = {} opt.ondone = _ => { this[ENDED] = true this[MAYBECLOSE]() } super(opt) this[CHECKED_CWD] = false this.reservations = pathReservations() this.transform = typeof opt.transform === 'function' ? opt.transform : null this.writable = true this.readable = false this[PENDING] = 0 this[ENDED] = false this.dirCache = opt.dirCache || new Map() if (typeof opt.uid === 'number' || typeof opt.gid === 'number') { // need both or neither if (typeof opt.uid !== 'number' || typeof opt.gid !== 'number') throw new TypeError('cannot set owner without number uid and gid') if (opt.preserveOwner) { throw new TypeError( 'cannot preserve owner in archive and also set owner explicitly') } this.uid = opt.uid this.gid = opt.gid this.setOwner = true } else { this.uid = null this.gid = null this.setOwner = false } // default true for root if (opt.preserveOwner === undefined && typeof opt.uid !== 'number') this.preserveOwner = process.getuid && process.getuid() === 0 else this.preserveOwner = !!opt.preserveOwner this.processUid = (this.preserveOwner || this.setOwner) && process.getuid ? process.getuid() : null this.processGid = (this.preserveOwner || this.setOwner) && process.getgid ? process.getgid() : null // mostly just for testing, but useful in some cases. // Forcibly trigger a chown on every entry, no matter what this.forceChown = opt.forceChown === true // turn > this[ONENTRY](entry)) } // a bad or damaged archive is a warning for Parser, but an error // when extracting. Mark those errors as unrecoverable, because // the Unpack contract cannot be met. warn (code, msg, data = {}) { if (code === 'TAR_BAD_ARCHIVE' || code === 'TAR_ABORT') data.recoverable = false return super.warn(code, msg, data) } [MAYBECLOSE] () { if (this[ENDED] && this[PENDING] === 0) { this.emit('prefinish') this.emit('finish') this.emit('end') this.emit('close') } } [CHECKPATH] (entry) { if (this.strip) { const parts = normPath(entry.path).split('/') if (parts.length < this.strip) return false entry.path = parts.slice(this.strip).join('/') if (entry.type === 'Link') { const linkparts = normPath(entry.linkpath).split('/') if (linkparts.length >= this.strip) entry.linkpath = linkparts.slice(this.strip).join('/') else return false } } if (!this.preservePaths) { const p = normPath(entry.path) const parts = p.split('/') if (parts.includes('..') || isWindows && /^[a-z]:\.\.$/i.test(parts[0])) { this.warn('TAR_ENTRY_ERROR', `path contains '..'`, { entry, path: p, }) return false } // strip off the root const [root, stripped] = stripAbsolutePath(p) if (root) { entry.path = stripped this.warn('TAR_ENTRY_INFO', `stripping ${root} from absolute path`, { entry, path: p, }) } } if (path.isAbsolute(entry.path)) entry.absolute = normPath(path.resolve(entry.path)) else entry.absolute = normPath(path.resolve(this.cwd, entry.path)) // if we somehow ended up with a path that escapes the cwd, and we are // not in preservePaths mode, then something is fishy! This should have // been prevented above, so ignore this for coverage. /* istanbul ignore if - defense in depth */ if (!this.preservePaths && entry.absolute.indexOf(this.cwd + '/') !== 0 && entry.absolute !== this.cwd) { this.warn('TAR_ENTRY_ERROR', 'path escaped extraction target', { entry, path: normPath(entry.path), resolvedPath: entry.absolute, cwd: this.cwd, }) return false } // an archive can set properties on the extraction directory, but it // may not replace the cwd with a different kind of thing entirely. if (entry.absolute === this.cwd && entry.type !== 'Directory' && entry.type !== 'GNUDumpDir') return false // only encode : chars that aren't drive letter indicators if (this.win32) { const { root: aRoot } = path.win32.parse(entry.absolute) entry.absolute = aRoot + wc.encode(entry.absolute.substr(aRoot.length)) const { root: pRoot } = path.win32.parse(entry.path) entry.path = pRoot + wc.encode(entry.path.substr(pRoot.length)) } return true } [ONENTRY] (entry) { if (!this[CHECKPATH](entry)) return entry.resume() assert.equal(typeof entry.absolute, 'string') switch (entry.type) { case 'Directory': case 'GNUDumpDir': if (entry.mode) entry.mode = entry.mode | 0o700 case 'File': case 'OldFile': case 'ContiguousFile': case 'Link': case 'SymbolicLink': return this[CHECKFS](entry) case 'CharacterDevice': case 'BlockDevice': case 'FIFO': default: return this[UNSUPPORTED](entry) } } [ONERROR] (er, entry) { // Cwd has to exist, or else nothing works. That's serious. // Other errors are warnings, which raise the error in strict // mode, but otherwise continue on. if (er.name === 'CwdError') this.emit('error', er) else { this.warn('TAR_ENTRY_ERROR', er, {entry}) this[UNPEND]() entry.resume() } } [MKDIR] (dir, mode, cb) { mkdir(normPath(dir), { uid: this.uid, gid: this.gid, processUid: this.processUid, processGid: this.processGid, umask: this.processUmask, preserve: this.preservePaths, unlink: this.unlink, cache: this.dirCache, cwd: this.cwd, mode: mode, noChmod: this.noChmod, }, cb) } [DOCHOWN] (entry) { // in preserve owner mode, chown if the entry doesn't match process // in set owner mode, chown if setting doesn't match process return this.forceChown || this.preserveOwner && (typeof entry.uid === 'number' && entry.uid !== this.processUid || typeof entry.gid === 'number' && entry.gid !== this.processGid) || (typeof this.uid === 'number' && this.uid !== this.processUid || typeof this.gid === 'number' && this.gid !== this.processGid) } [UID] (entry) { return uint32(this.uid, entry.uid, this.processUid) } [GID] (entry) { return uint32(this.gid, entry.gid, this.processGid) } [FILE] (entry, fullyDone) { const mode = entry.mode & 0o7777 || this.fmode const stream = new fsm.WriteStream(entry.absolute, { flags: getFlag(entry.size), mode: mode, autoClose: false, }) stream.on('error', er => { if (stream.fd) fs.close(stream.fd, () => {}) // flush all the data out so that we aren't left hanging // if the error wasn't actually fatal. otherwise the parse // is blocked, and we never proceed. stream.write = () => true this[ONERROR](er, entry) fullyDone() }) let actions = 1 const done = er => { if (er) { /* istanbul ignore else - we should always have a fd by now */ if (stream.fd) fs.close(stream.fd, () => {}) this[ONERROR](er, entry) fullyDone() return } if (--actions === 0) { fs.close(stream.fd, er => { if (er) this[ONERROR](er, entry) else this[UNPEND]() fullyDone() }) } } stream.on('finish', _ => { // if futimes fails, try utimes // if utimes fails, fail with the original error // same for fchown/chown const abs = entry.absolute const fd = stream.fd if (entry.mtime && !this.noMtime) { actions++ const atime = entry.atime || new Date() const mtime = entry.mtime fs.futimes(fd, atime, mtime, er => er ? fs.utimes(abs, atime, mtime, er2 => done(er2 && er)) : done()) } if (this[DOCHOWN](entry)) { actions++ const uid = this[UID](entry) const gid = this[GID](entry) fs.fchown(fd, uid, gid, er => er ? fs.chown(abs, uid, gid, er2 => done(er2 && er)) : done()) } done() }) const tx = this.transform ? this.transform(entry) || entry : entry if (tx !== entry) { tx.on('error', er => { this[ONERROR](er, entry) fullyDone() }) entry.pipe(tx) } tx.pipe(stream) } [DIRECTORY] (entry, fullyDone) { const mode = entry.mode & 0o7777 || this.dmode this[MKDIR](entry.absolute, mode, er => { if (er) { this[ONERROR](er, entry) fullyDone() return } let actions = 1 const done = _ => { if (--actions === 0) { fullyDone() this[UNPEND]() entry.resume() } } if (entry.mtime && !this.noMtime) { actions++ fs.utimes(entry.absolute, entry.atime || new Date(), entry.mtime, done) } if (this[DOCHOWN](entry)) { actions++ fs.chown(entry.absolute, this[UID](entry), this[GID](entry), done) } done() }) } [UNSUPPORTED] (entry) { entry.unsupported = true this.warn('TAR_ENTRY_UNSUPPORTED', `unsupported entry type: ${entry.type}`, {entry}) entry.resume() } [SYMLINK] (entry, done) { this[LINK](entry, entry.linkpath, 'symlink', done) } [HARDLINK] (entry, done) { const linkpath = normPath(path.resolve(this.cwd, entry.linkpath)) this[LINK](entry, linkpath, 'link', done) } [PEND] () { this[PENDING]++ } [UNPEND] () { this[PENDING]-- this[MAYBECLOSE]() } [SKIP] (entry) { this[UNPEND]() entry.resume() } // Check if we can reuse an existing filesystem entry safely and // overwrite it, rather than unlinking and recreating // Windows doesn't report a useful nlink, so we just never reuse entries [ISREUSABLE] (entry, st) { return entry.type === 'File' && !this.unlink && st.isFile() && st.nlink <= 1 && !isWindows } // check if a thing is there, and if so, try to clobber it [CHECKFS] (entry) { this[PEND]() const paths = [entry.path] if (entry.linkpath) paths.push(entry.linkpath) this.reservations.reserve(paths, done => this[CHECKFS2](entry, done)) } [PRUNECACHE] (entry) { // if we are not creating a directory, and the path is in the dirCache, // then that means we are about to delete the directory we created // previously, and it is no longer going to be a directory, and neither // is any of its children. // If a symbolic link is encountered, all bets are off. There is no // reasonable way to sanitize the cache in such a way we will be able to // avoid having filesystem collisions. If this happens with a non-symlink // entry, it'll just fail to unpack, but a symlink to a directory, using an // 8.3 shortname or certain unicode attacks, can evade detection and lead // to arbitrary writes to anywhere on the system. if (entry.type === 'SymbolicLink') dropCache(this.dirCache) else if (entry.type !== 'Directory') pruneCache(this.dirCache, entry.absolute) } [CHECKFS2] (entry, fullyDone) { this[PRUNECACHE](entry) const done = er => { this[PRUNECACHE](entry) fullyDone(er) } const checkCwd = () => { this[MKDIR](this.cwd, this.dmode, er => { if (er) { this[ONERROR](er, entry) done() return } this[CHECKED_CWD] = true start() }) } const start = () => { if (entry.absolute !== this.cwd) { const parent = normPath(path.dirname(entry.absolute)) if (parent !== this.cwd) { return this[MKDIR](parent, this.dmode, er => { if (er) { this[ONERROR](er, entry) done() return } afterMakeParent() }) } } afterMakeParent() } const afterMakeParent = () => { fs.lstat(entry.absolute, (lstatEr, st) => { if (st && (this.keep || this.newer && st.mtime > entry.mtime)) { this[SKIP](entry) done() return } if (lstatEr || this[ISREUSABLE](entry, st)) return this[MAKEFS](null, entry, done) if (st.isDirectory()) { if (entry.type === 'Directory') { const needChmod = !this.noChmod && entry.mode && (st.mode & 0o7777) !== entry.mode const afterChmod = er => this[MAKEFS](er, entry, done) if (!needChmod) return afterChmod() return fs.chmod(entry.absolute, entry.mode, afterChmod) } // Not a dir entry, have to remove it. // NB: the only way to end up with an entry that is the cwd // itself, in such a way that == does not detect, is a // tricky windows absolute path with UNC or 8.3 parts (and // preservePaths:true, or else it will have been stripped). // In that case, the user has opted out of path protections // explicitly, so if they blow away the cwd, c'est la vie. if (entry.absolute !== this.cwd) { return fs.rmdir(entry.absolute, er => this[MAKEFS](er, entry, done)) } } // not a dir, and not reusable // don't remove if the cwd, we want that error if (entry.absolute === this.cwd) return this[MAKEFS](null, entry, done) unlinkFile(entry.absolute, er => this[MAKEFS](er, entry, done)) }) } if (this[CHECKED_CWD]) start() else checkCwd() } [MAKEFS] (er, entry, done) { if (er) { this[ONERROR](er, entry) done() return } switch (entry.type) { case 'File': case 'OldFile': case 'ContiguousFile': return this[FILE](entry, done) case 'Link': return this[HARDLINK](entry, done) case 'SymbolicLink': return this[SYMLINK](entry, done) case 'Directory': case 'GNUDumpDir': return this[DIRECTORY](entry, done) } } [LINK] (entry, linkpath, link, done) { // XXX: get the type ('symlink' or 'junction') for windows fs[link](linkpath, entry.absolute, er => { if (er) this[ONERROR](er, entry) else { this[UNPEND]() entry.resume() } done() }) } } const callSync = fn => { try { return [null, fn()] } catch (er) { return [er, null] } } class UnpackSync extends Unpack { [MAKEFS] (er, entry) { return super[MAKEFS](er, entry, () => {}) } [CHECKFS] (entry) { this[PRUNECACHE](entry) if (!this[CHECKED_CWD]) { const er = this[MKDIR](this.cwd, this.dmode) if (er) return this[ONERROR](er, entry) this[CHECKED_CWD] = true } // don't bother to make the parent if the current entry is the cwd, // we've already checked it. if (entry.absolute !== this.cwd) { const parent = normPath(path.dirname(entry.absolute)) if (parent !== this.cwd) { const mkParent = this[MKDIR](parent, this.dmode) if (mkParent) return this[ONERROR](mkParent, entry) } } const [lstatEr, st] = callSync(() => fs.lstatSync(entry.absolute)) if (st && (this.keep || this.newer && st.mtime > entry.mtime)) return this[SKIP](entry) if (lstatEr || this[ISREUSABLE](entry, st)) return this[MAKEFS](null, entry) if (st.isDirectory()) { if (entry.type === 'Directory') { const needChmod = !this.noChmod && entry.mode && (st.mode & 0o7777) !== entry.mode const [er] = needChmod ? callSync(() => { fs.chmodSync(entry.absolute, entry.mode) }) : [] return this[MAKEFS](er, entry) } // not a dir entry, have to remove it const [er] = callSync(() => fs.rmdirSync(entry.absolute)) this[MAKEFS](er, entry) } // not a dir, and not reusable. // don't remove if it's the cwd, since we want that error. const [er] = entry.absolute === this.cwd ? [] : callSync(() => unlinkFileSync(entry.absolute)) this[MAKEFS](er, entry) } [FILE] (entry, done) { const mode = entry.mode & 0o7777 || this.fmode const oner = er => { let closeError try { fs.closeSync(fd) } catch (e) { closeError = e } if (er || closeError) this[ONERROR](er || closeError, entry) done() } let fd try { fd = fs.openSync(entry.absolute, getFlag(entry.size), mode) } catch (er) { return oner(er) } const tx = this.transform ? this.transform(entry) || entry : entry if (tx !== entry) { tx.on('error', er => this[ONERROR](er, entry)) entry.pipe(tx) } tx.on('data', chunk => { try { fs.writeSync(fd, chunk, 0, chunk.length) } catch (er) { oner(er) } }) tx.on('end', _ => { let er = null // try both, falling futimes back to utimes // if either fails, handle the first error if (entry.mtime && !this.noMtime) { const atime = entry.atime || new Date() const mtime = entry.mtime try { fs.futimesSync(fd, atime, mtime) } catch (futimeser) { try { fs.utimesSync(entry.absolute, atime, mtime) } catch (utimeser) { er = futimeser } } } if (this[DOCHOWN](entry)) { const uid = this[UID](entry) const gid = this[GID](entry) try { fs.fchownSync(fd, uid, gid) } catch (fchowner) { try { fs.chownSync(entry.absolute, uid, gid) } catch (chowner) { er = er || fchowner } } } oner(er) }) } [DIRECTORY] (entry, done) { const mode = entry.mode & 0o7777 || this.dmode const er = this[MKDIR](entry.absolute, mode) if (er) { this[ONERROR](er, entry) done() return } if (entry.mtime && !this.noMtime) { try { fs.utimesSync(entry.absolute, entry.atime || new Date(), entry.mtime) } catch (er) {} } if (this[DOCHOWN](entry)) { try { fs.chownSync(entry.absolute, this[UID](entry), this[GID](entry)) } catch (er) {} } done() entry.resume() } [MKDIR] (dir, mode) { try { return mkdir.sync(normPath(dir), { uid: this.uid, gid: this.gid, processUid: this.processUid, processGid: this.processGid, umask: this.processUmask, preserve: this.preservePaths, unlink: this.unlink, cache: this.dirCache, cwd: this.cwd, mode: mode, }) } catch (er) { return er } } [LINK] (entry, linkpath, link, done) { try { fs[link + 'Sync'](linkpath, entry.absolute) done() entry.resume() } catch (er) { return this[ONERROR](er, entry) } } } Unpack.Sync = UnpackSync module.exports = Unpack