Merge pull request #5 from eliot-akira/stream-files-by-line

Stream database file line by line to avoid string length limit in Node.js
pull/10/head
tex0l 3 years ago committed by GitHub
commit 1d8c88842b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 15
      CHANGELOG.md
  2. 1
      browser-version/lib/byline.js
  3. 7
      browser-version/lib/storage.js
  4. 153
      lib/byline.js
  5. 90
      lib/persistence.js
  6. 36
      lib/storage.js
  7. 46
      package-lock.json
  8. 10
      package.json
  9. 207
      test/byline.test.js
  10. 2
      test/byline/CRLF.txt
  11. 6
      test/byline/empty.txt
  12. 9859
      test/byline/rfc.txt
  13. 19718
      test/byline/rfc_huge.txt
  14. 154
      test/persistence.test.js
  15. 12
      test_lac/loadAndCrash.test.js
  16. 1
      webpack.config.js

@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres
to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [2.1.0-2] - 2021-10-14
### Changed
- properly streaming writing the database file
## [2.1.0-1] - 2021-10-07
### Changed
- fixed package.json browser field for byline.js
- last minute improvements on [PR](https://github.com/seald/nedb/pull/5)
## [2.1.0-0] - 2021-10-05
Thank [@eliot-akira](https://github.com/eliot-akira) for the amazing work on this.
### Changed
- [implement file streaming of the database](https://github.com/seald/nedb/pull/5) like [a PR on the original repo](https://github.com/louischatriot/nedb/pull/463) did;
- internalize [`byline`](https://github.com/jahewson/node-byline) package because it is unmaintained.
## [2.0.4] - 2021-07-12
### Fixed
- switch back to an AVLTree instead of a BinarySearchTree like the original nedb to fix [#1](https://github.com/seald/nedb/issues/1).

@ -0,0 +1 @@
module.exports = {}

@ -68,11 +68,16 @@ const mkdir = (dir, options, callback) => callback()
// Nothing to do, no data corruption possible in the browser
const ensureDatafileIntegrity = (filename, callback) => callback(null)
const crashSafeWriteFileLines = (filename, lines, callback) => {
lines.push('') // Add final new line
writeFile(filename, lines.join('\n'), callback)
}
// Interface
module.exports.exists = exists
module.exports.rename = rename
module.exports.writeFile = writeFile
module.exports.crashSafeWriteFile = writeFile // No need for a crash safe function in the browser
module.exports.crashSafeWriteFileLines = crashSafeWriteFileLines
module.exports.appendFile = appendFile
module.exports.readFile = readFile
module.exports.unlink = unlink

@ -0,0 +1,153 @@
// Forked from https://github.com/jahewson/node-byline
// Copyright (C) 2011-2015 John Hewson
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
const stream = require('stream')
const util = require('util')
const timers = require('timers')
// convinience API
module.exports = function (readStream, options) {
return module.exports.createStream(readStream, options)
}
// basic API
module.exports.createStream = function (readStream, options) {
if (readStream) {
return createLineStream(readStream, options)
} else {
return new LineStream(options)
}
}
// deprecated API
module.exports.createLineStream = function (readStream) {
console.log('WARNING: byline#createLineStream is deprecated and will be removed soon')
return createLineStream(readStream)
}
function createLineStream (readStream, options) {
if (!readStream) {
throw new Error('expected readStream')
}
if (!readStream.readable) {
throw new Error('readStream must be readable')
}
const ls = new LineStream(options)
readStream.pipe(ls)
return ls
}
//
// using the new node v0.10 "streams2" API
//
module.exports.LineStream = LineStream
function LineStream (options) {
stream.Transform.call(this, options)
options = options || {}
// use objectMode to stop the output from being buffered
// which re-concatanates the lines, just without newlines.
this._readableState.objectMode = true
this._lineBuffer = []
this._keepEmptyLines = options.keepEmptyLines || false
this._lastChunkEndedWithCR = false
// take the source's encoding if we don't have one
const self = this
this.on('pipe', function (src) {
if (!self.encoding) {
// but we can't do this for old-style streams
if (src instanceof stream.Readable) {
self.encoding = src._readableState.encoding
}
}
})
}
util.inherits(LineStream, stream.Transform)
LineStream.prototype._transform = function (chunk, encoding, done) {
// decode binary chunks as UTF-8
encoding = encoding || 'utf8'
if (Buffer.isBuffer(chunk)) {
if (encoding === 'buffer') {
chunk = chunk.toString() // utf8
encoding = 'utf8'
} else {
chunk = chunk.toString(encoding)
}
}
this._chunkEncoding = encoding
// see: http://www.unicode.org/reports/tr18/#Line_Boundaries
const lines = chunk.split(/\r\n|[\n\v\f\r\x85\u2028\u2029]/g)
// don't split CRLF which spans chunks
if (this._lastChunkEndedWithCR && chunk[0] === '\n') {
lines.shift()
}
if (this._lineBuffer.length > 0) {
this._lineBuffer[this._lineBuffer.length - 1] += lines[0]
lines.shift()
}
this._lastChunkEndedWithCR = chunk[chunk.length - 1] === '\r'
this._lineBuffer = this._lineBuffer.concat(lines)
this._pushBuffer(encoding, 1, done)
}
LineStream.prototype._pushBuffer = function (encoding, keep, done) {
// always buffer the last (possibly partial) line
while (this._lineBuffer.length > keep) {
const line = this._lineBuffer.shift()
// skip empty lines
if (this._keepEmptyLines || line.length > 0) {
if (!this.push(this._reencode(line, encoding))) {
// when the high-water mark is reached, defer pushes until the next tick
timers.setImmediate(() => {
this._pushBuffer(encoding, keep, done)
})
return
}
}
}
done()
}
LineStream.prototype._flush = function (done) {
this._pushBuffer(this._chunkEncoding, 0, done)
}
// see Readable::push
LineStream.prototype._reencode = function (line, chunkEncoding) {
if (this.encoding && this.encoding !== chunkEncoding) {
return Buffer.from(line, chunkEncoding).toString(this.encoding)
} else if (this.encoding) {
// this should be the most common case, i.e. we're using an encoded source stream
return line
} else {
return Buffer.from(line, chunkEncoding)
}
}

@ -6,6 +6,7 @@
*/
const path = require('path')
const async = require('async')
const byline = require('./byline')
const customUtils = require('./customUtils.js')
const Index = require('./indexes.js')
const model = require('./model.js')
@ -72,26 +73,26 @@ class Persistence {
* @param {Function} callback Optional callback, signature: err
*/
persistCachedDatabase (callback = () => {}) {
let toPersist = ''
const lines = []
if (this.inMemoryOnly) return callback(null)
this.db.getAllData().forEach(doc => {
toPersist += this.afterSerialization(model.serialize(doc)) + '\n'
lines.push(this.afterSerialization(model.serialize(doc)))
})
Object.keys(this.db.indexes).forEach(fieldName => {
if (fieldName !== '_id') { // The special _id index is managed by datastore.js, the others need to be persisted
toPersist += this.afterSerialization(model.serialize({
lines.push(this.afterSerialization(model.serialize({
$$indexCreated: {
fieldName: fieldName,
unique: this.db.indexes[fieldName].unique,
sparse: this.db.indexes[fieldName].sparse
}
})) + '\n'
})))
}
})
storage.crashSafeWriteFile(this.filename, toPersist, err => {
storage.crashSafeWriteFileLines(this.filename, lines, err => {
if (err) return callback(err)
this.db.emit('compaction.done')
return callback(null)
@ -155,8 +156,9 @@ class Persistence {
treatRawData (rawData) {
const data = rawData.split('\n')
const dataById = {}
const tdata = []
const indexes = {}
// Last line of every data file is usually blank so not really corrupt
let corruptItems = -1
for (const datum of data) {
@ -178,11 +180,58 @@ class Persistence {
corruptItems / data.length > this.corruptAlertThreshold
) throw new Error(`More than ${Math.floor(100 * this.corruptAlertThreshold)}% of the data file is corrupt, the wrong beforeDeserialization hook may be used. Cautiously refusing to start NeDB to prevent dataloss`)
tdata.push(...Object.values(dataById))
const tdata = Object.values(dataById)
return { data: tdata, indexes: indexes }
}
/**
* From a database's raw stream, return the corresponding
* machine understandable collection
*/
treatRawStream (rawStream, cb) {
const dataById = {}
const indexes = {}
// Last line of every data file is usually blank so not really corrupt
let corruptItems = -1
const lineStream = byline(rawStream, { keepEmptyLines: true })
let length = 0
lineStream.on('data', (line) => {
try {
const doc = model.deserialize(this.beforeDeserialization(line))
if (doc._id) {
if (doc.$$deleted === true) delete dataById[doc._id]
else dataById[doc._id] = doc
} else if (doc.$$indexCreated && doc.$$indexCreated.fieldName != null) indexes[doc.$$indexCreated.fieldName] = doc.$$indexCreated
else if (typeof doc.$$indexRemoved === 'string') delete indexes[doc.$$indexRemoved]
} catch (e) {
corruptItems += 1
}
length++
})
lineStream.on('end', () => {
// A bit lenient on corruption
if (length > 0 && corruptItems / length > this.corruptAlertThreshold) {
const err = new Error(`More than ${Math.floor(100 * this.corruptAlertThreshold)}% of the data file is corrupt, the wrong beforeDeserialization hook may be used. Cautiously refusing to start NeDB to prevent dataloss`)
cb(err, null)
return
}
const data = Object.values(dataById)
cb(null, { data, indexes: indexes })
})
lineStream.on('error', function (err) {
cb(err)
})
}
/**
* Load the database
* 1) Create all indexes
@ -207,14 +256,8 @@ class Persistence {
// eslint-disable-next-line node/handle-callback-err
storage.ensureDatafileIntegrity(this.filename, err => {
// TODO: handle error
storage.readFile(this.filename, 'utf8', (err, rawData) => {
const treatedDataCallback = (err, treatedData) => {
if (err) return cb(err)
let treatedData
try {
treatedData = this.treatRawData(rawData)
} catch (e) {
return cb(e)
}
// Recreate all indexes in the datafile
Object.keys(treatedData.indexes).forEach(key => {
@ -230,6 +273,25 @@ class Persistence {
}
this.db.persistence.persistCachedDatabase(cb)
}
if (storage.readFileStream) {
// Server side
const fileStream = storage.readFileStream(this.filename, { encoding: 'utf8' })
this.treatRawStream(fileStream, treatedDataCallback)
return
}
// Browser
storage.readFile(this.filename, 'utf8', (err, rawData) => {
if (err) return cb(err)
try {
const treatedData = this.treatRawData(rawData)
treatedDataCallback(null, treatedData)
} catch (e) {
return cb(e)
}
})
})
})

@ -10,6 +10,7 @@ const fs = require('fs')
const path = require('path')
const async = require('async')
const storage = {}
const { Readable } = require('stream')
// eslint-disable-next-line node/no-callback-literal
storage.exists = (path, cb) => fs.access(path, fs.constants.F_OK, (err) => { cb(!err) })
@ -18,6 +19,7 @@ storage.writeFile = fs.writeFile
storage.unlink = fs.unlink
storage.appendFile = fs.appendFile
storage.readFile = fs.readFile
storage.readFileStream = fs.createReadStream
storage.mkdir = fs.mkdir
/**
@ -69,13 +71,41 @@ storage.flushToStorage = (options, callback) => {
})
}
/**
* Fully write or rewrite the datafile
* @param {String} filename
* @param {String[]} lines
* @param {Function} callback
*/
storage.writeFileLines = (filename, lines, callback = () => {}) => {
try {
const stream = fs.createWriteStream(filename)
const readable = Readable.from(lines)
readable.on('data', (line) => {
try {
stream.write(line)
stream.write('\n')
} catch (err) {
callback(err)
}
})
readable.on('end', () => {
stream.close(callback)
})
readable.on('error', callback)
stream.on('error', callback)
} catch (err) {
callback(err)
}
}
/**
* Fully write or rewrite the datafile, immune to crashes during the write operation (data will not be lost)
* @param {String} filename
* @param {String} data
* @param {String[]} lines
* @param {Function} callback Optional callback, signature: err
*/
storage.crashSafeWriteFile = (filename, data, callback = () => {}) => {
storage.crashSafeWriteFileLines = (filename, lines, callback = () => {}) => {
const tempFilename = filename + '~'
async.waterfall([
@ -87,7 +117,7 @@ storage.crashSafeWriteFile = (filename, data, callback = () => {}) => {
})
},
cb => {
storage.writeFile(tempFilename, data, err => cb(err))
storage.writeFileLines(tempFilename, lines, cb)
},
async.apply(storage.flushToStorage, tempFilename),
cb => {

46
package-lock.json generated

@ -1,12 +1,12 @@
{
"name": "@seald-io/nedb",
"version": "2.0.4",
"version": "2.1.0-3",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@seald-io/nedb",
"version": "2.0.4",
"version": "2.1.0-3",
"license": "MIT",
"dependencies": {
"@seald-io/binary-search-tree": "^1.0.2",
@ -1371,9 +1371,9 @@
}
},
"node_modules/es-abstract": {
"version": "1.18.0",
"resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.18.0.tgz",
"integrity": "sha512-LJzK7MrQa8TS0ja2w3YNLzUgJCGPdPOV1yVvezjNnS89D+VR08+Szt2mz3YB2Dck/+w5tfIq/RoUAFqJJGM2yw==",
"version": "1.18.5",
"resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.18.5.tgz",
"integrity": "sha512-DDggyJLoS91CkJjgauM5c0yZMjiD1uK3KcaCeAmffGwZ+ODWzOkPN4QwRbsK5DOFf06fywmyLci3ZD8jLGhVYA==",
"dev": true,
"dependencies": {
"call-bind": "^1.0.2",
@ -1382,16 +1382,17 @@
"get-intrinsic": "^1.1.1",
"has": "^1.0.3",
"has-symbols": "^1.0.2",
"internal-slot": "^1.0.3",
"is-callable": "^1.2.3",
"is-negative-zero": "^2.0.1",
"is-regex": "^1.1.2",
"is-string": "^1.0.5",
"object-inspect": "^1.9.0",
"is-regex": "^1.1.3",
"is-string": "^1.0.6",
"object-inspect": "^1.11.0",
"object-keys": "^1.1.1",
"object.assign": "^4.1.2",
"string.prototype.trimend": "^1.0.4",
"string.prototype.trimstart": "^1.0.4",
"unbox-primitive": "^1.0.0"
"unbox-primitive": "^1.0.1"
},
"engines": {
"node": ">= 0.4"
@ -3453,9 +3454,9 @@
}
},
"node_modules/object-inspect": {
"version": "1.10.3",
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.10.3.tgz",
"integrity": "sha512-e5mCJlSH7poANfC8z8S9s9S2IN5/4Zb3aZ33f5s8YqoazCFzNLloLU8r5VCG+G7WoqLvAAZoVMcy3tp/3X0Plw==",
"version": "1.11.0",
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.11.0.tgz",
"integrity": "sha512-jp7ikS6Sd3GxQfZJPyH3cjcbJF6GZPClgdV+EFygjFLQ5FmW/dRUnTd9PQ9k0JhoNDabWFbpF1yCdSWCC6gexg==",
"dev": true,
"funding": {
"url": "https://github.com/sponsors/ljharb"
@ -6702,9 +6703,9 @@
}
},
"es-abstract": {
"version": "1.18.0",
"resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.18.0.tgz",
"integrity": "sha512-LJzK7MrQa8TS0ja2w3YNLzUgJCGPdPOV1yVvezjNnS89D+VR08+Szt2mz3YB2Dck/+w5tfIq/RoUAFqJJGM2yw==",
"version": "1.18.5",
"resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.18.5.tgz",
"integrity": "sha512-DDggyJLoS91CkJjgauM5c0yZMjiD1uK3KcaCeAmffGwZ+ODWzOkPN4QwRbsK5DOFf06fywmyLci3ZD8jLGhVYA==",
"dev": true,
"requires": {
"call-bind": "^1.0.2",
@ -6713,16 +6714,17 @@
"get-intrinsic": "^1.1.1",
"has": "^1.0.3",
"has-symbols": "^1.0.2",
"internal-slot": "^1.0.3",
"is-callable": "^1.2.3",
"is-negative-zero": "^2.0.1",
"is-regex": "^1.1.2",
"is-string": "^1.0.5",
"object-inspect": "^1.9.0",
"is-regex": "^1.1.3",
"is-string": "^1.0.6",
"object-inspect": "^1.11.0",
"object-keys": "^1.1.1",
"object.assign": "^4.1.2",
"string.prototype.trimend": "^1.0.4",
"string.prototype.trimstart": "^1.0.4",
"unbox-primitive": "^1.0.0"
"unbox-primitive": "^1.0.1"
}
},
"es-module-lexer": {
@ -8274,9 +8276,9 @@
"dev": true
},
"object-inspect": {
"version": "1.10.3",
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.10.3.tgz",
"integrity": "sha512-e5mCJlSH7poANfC8z8S9s9S2IN5/4Zb3aZ33f5s8YqoazCFzNLloLU8r5VCG+G7WoqLvAAZoVMcy3tp/3X0Plw==",
"version": "1.11.0",
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.11.0.tgz",
"integrity": "sha512-jp7ikS6Sd3GxQfZJPyH3cjcbJF6GZPClgdV+EFygjFLQ5FmW/dRUnTd9PQ9k0JhoNDabWFbpF1yCdSWCC6gexg==",
"dev": true
},
"object-keys": {

@ -1,6 +1,6 @@
{
"name": "@seald-io/nedb",
"version": "2.0.4",
"version": "2.1.0-3",
"files": [
"lib/**/*.js",
"browser-version/**/*.js",
@ -20,6 +20,11 @@
"name": "Timothée Rebours",
"email": "tim@seald.io",
"url": "https://www.seald.io/"
},
{
"name": "Eliot Akira",
"email": "me@eliotakira.com",
"url": "https://eliotakira.com/"
}
],
"description": "File-based embedded data store for node.js",
@ -73,7 +78,8 @@
"main": "index.js",
"browser": {
"./lib/customUtils.js": "./browser-version/lib/customUtils.js",
"./lib/storage.js": "./browser-version/lib/storage.js"
"./lib/storage.js": "./browser-version/lib/storage.js",
"./lib/byline.js": "./browser-version/lib/byline.js"
},
"license": "MIT",
"publishConfig": {

@ -0,0 +1,207 @@
/* eslint-env mocha */
// Copyright (C) 2013-2015 John Hewson
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
const chai = require('chai')
const fs = require('fs')
const path = require('path')
const byline = require('../lib/byline')
const { assert } = chai
const regEx = /\r\n|[\n\v\f\r\x85\u2028\u2029]/g
const localPath = file => path.join(__dirname, 'byline', file)
describe('byline', function () {
it('should pipe a small file', function (done) {
const input = fs.createReadStream(localPath('empty.txt'))
const lineStream = byline(input) // convinience API
const output = fs.createWriteStream(localPath('test.txt'))
lineStream.pipe(output)
output.on('close', function () {
const out = fs.readFileSync(localPath('test.txt'), 'utf8')
const in_ = fs.readFileSync(localPath('empty.txt'), 'utf8').replace(/\n/g, '')
assert.equal(in_, out)
fs.unlinkSync(localPath('test.txt'))
done()
})
})
it('should work with streams2 API', function (done) {
let stream = fs.createReadStream(localPath('empty.txt'))
stream = byline.createStream(stream)
stream.on('readable', function () {
while (stream.read() !== null) {
// eslint-ignore-line no-empty
}
})
stream.on('end', function () {
done()
})
})
it('should ignore empty lines by default', function (done) {
const input = fs.createReadStream(localPath('empty.txt'))
const lineStream = byline(input)
lineStream.setEncoding('utf8')
const lines1 = []
lineStream.on('data', function (line) {
lines1.push(line)
})
lineStream.on('end', function () {
let lines2 = fs.readFileSync(localPath('empty.txt'), 'utf8').split(regEx)
lines2 = lines2.filter(function (line) {
return line.length > 0
})
assert.deepEqual(lines2, lines1)
done()
})
})
it('should keep empty lines when keepEmptyLines is true', function (done) {
const input = fs.createReadStream(localPath('empty.txt'))
const lineStream = byline(input, { keepEmptyLines: true })
lineStream.setEncoding('utf8')
const lines = []
lineStream.on('data', function (line) {
lines.push(line)
})
lineStream.on('end', function () {
assert.deepEqual(['', '', '', '', '', 'Line 6'], lines)
done()
})
})
it('should not split a CRLF which spans two chunks', function (done) {
const input = fs.createReadStream(localPath('CRLF.txt'))
const lineStream = byline(input, { keepEmptyLines: true })
lineStream.setEncoding('utf8')
const lines = []
lineStream.on('data', function (line) {
lines.push(line)
})
lineStream.on('end', function () {
assert.equal(2, lines.length)
done()
})
})
it('should read a large file', function (done) {
readFile(localPath('rfc.txt'), done)
})
it('should read a huge file', function (done) {
// Readable highWaterMark is 16384, so we test a file with more lines than this
readFile(localPath('rfc_huge.txt'), done)
})
function readFile (filename, done) {
const input = fs.createReadStream(filename)
const lineStream = byline(input)
lineStream.setEncoding('utf8')
let lines2 = fs.readFileSync(filename, 'utf8').split(regEx)
lines2 = lines2.filter(function (line) {
return line.length > 0
})
const lines1 = []
let i = 0
lineStream.on('data', function (line) {
lines1.push(line)
if (line !== lines2[i]) {
console.log('EXPECTED:', lines2[i])
console.log(' GOT:', line)
assert.fail(null, null, 'difference at line ' + (i + 1))
}
i++
})
lineStream.on('end', function () {
assert.equal(lines2.length, lines1.length)
assert.deepEqual(lines2, lines1)
done()
})
}
it('should handle encodings like fs', function (done) {
areStreamsEqualTypes(undefined, function () {
areStreamsEqualTypes({ encoding: 'utf8' }, function () {
done()
})
})
})
it('should pause() and resume() with a huge file', function (done) {
const input = fs.createReadStream(localPath('rfc_huge.txt'))
const lineStream = byline(input)
lineStream.setEncoding('utf8')
let lines2 = fs.readFileSync(localPath('rfc_huge.txt'), 'utf8').split(regEx)
lines2 = lines2.filter(function (line) {
return line.length > 0
})
const lines1 = []
let i = 0
lineStream.on('data', function (line) {
lines1.push(line)
if (line !== lines2[i]) {
console.log('EXPECTED:', lines2[i])
console.log(' GOT:', line)
assert.fail(null, null, 'difference at line ' + (i + 1))
}
i++
// pause/resume
lineStream.pause()
setImmediate(function () {
lineStream.resume()
})
})
lineStream.on('end', function () {
assert.equal(lines2.length, lines1.length)
assert.deepEqual(lines2, lines1)
done()
})
})
function areStreamsEqualTypes (options, callback) {
const fsStream = fs.createReadStream(localPath('empty.txt'), options)
const lineStream = byline(fs.createReadStream(localPath('empty.txt'), options))
fsStream.on('data', function (data1) {
lineStream.on('data', function (data2) {
assert.equal(Buffer.isBuffer(data1), Buffer.isBuffer(data2))
})
lineStream.on('end', function () {
callback()
})
})
}
})

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -9,6 +9,7 @@ const Datastore = require('../lib/datastore')
const Persistence = require('../lib/persistence')
const storage = require('../lib/storage')
const { execFile, fork } = require('child_process')
const Readable = require('stream').Readable
const { assert } = chai
chai.should()
@ -55,6 +56,28 @@ describe('Persistence', function () {
assert.deepStrictEqual(treatedData[2], { _id: '3', nested: { today: now } })
})
it('Every line represents a document (with stream)', function (done) {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
model.serialize({ _id: '2', hello: 'world' }) + '\n' +
model.serialize({ _id: '3', nested: { today: now } })
const stream = new Readable()
stream.push(rawData)
stream.push(null)
d.persistence.treatRawStream(stream, function (err, result) {
assert.isNull(err)
const treatedData = result.data
treatedData.sort(function (a, b) { return a._id - b._id })
treatedData.length.should.equal(3)
assert.deepStrictEqual(treatedData[0], { _id: '1', a: 2, ages: [1, 5, 12] })
assert.deepStrictEqual(treatedData[1], { _id: '2', hello: 'world' })
assert.deepStrictEqual(treatedData[2], { _id: '3', nested: { today: now } })
done()
})
})
it('Badly formatted lines have no impact on the treated data', function () {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
@ -68,6 +91,27 @@ describe('Persistence', function () {
assert.deepStrictEqual(treatedData[1], { _id: '3', nested: { today: now } })
})
it('Badly formatted lines have no impact on the treated data (with stream)', function (done) {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
'garbage\n' +
model.serialize({ _id: '3', nested: { today: now } })
const stream = new Readable()
stream.push(rawData)
stream.push(null)
d.persistence.treatRawStream(stream, function (err, result) {
assert.isNull(err)
const treatedData = result.data
treatedData.sort(function (a, b) { return a._id - b._id })
treatedData.length.should.equal(2)
assert.deepStrictEqual(treatedData[0], { _id: '1', a: 2, ages: [1, 5, 12] })
assert.deepStrictEqual(treatedData[1], { _id: '3', nested: { today: now } })
done()
})
})
it('Well formatted lines that have no _id are not included in the data', function () {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
@ -81,6 +125,27 @@ describe('Persistence', function () {
assert.deepStrictEqual(treatedData[1], { _id: '2', hello: 'world' })
})
it('Well formatted lines that have no _id are not included in the data (with stream)', function (done) {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
model.serialize({ _id: '2', hello: 'world' }) + '\n' +
model.serialize({ nested: { today: now } })
const stream = new Readable()
stream.push(rawData)
stream.push(null)
d.persistence.treatRawStream(stream, function (err, result) {
assert.isNull(err)
const treatedData = result.data
treatedData.sort(function (a, b) { return a._id - b._id })
treatedData.length.should.equal(2)
assert.deepStrictEqual(treatedData[0], { _id: '1', a: 2, ages: [1, 5, 12] })
assert.deepStrictEqual(treatedData[1], { _id: '2', hello: 'world' })
done()
})
})
it('If two lines concern the same doc (= same _id), the last one is the good version', function () {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
@ -94,6 +159,27 @@ describe('Persistence', function () {
assert.deepStrictEqual(treatedData[1], { _id: '2', hello: 'world' })
})
it('If two lines concern the same doc (= same _id), the last one is the good version (with stream)', function (done) {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
model.serialize({ _id: '2', hello: 'world' }) + '\n' +
model.serialize({ _id: '1', nested: { today: now } })
const stream = new Readable()
stream.push(rawData)
stream.push(null)
d.persistence.treatRawStream(stream, function (err, result) {
assert.isNull(err)
const treatedData = result.data
treatedData.sort(function (a, b) { return a._id - b._id })
treatedData.length.should.equal(2)
assert.deepStrictEqual(treatedData[0], { _id: '1', nested: { today: now } })
assert.deepStrictEqual(treatedData[1], { _id: '2', hello: 'world' })
done()
})
})
it('If a doc contains $$deleted: true, that means we need to remove it from the data', function () {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
@ -108,6 +194,28 @@ describe('Persistence', function () {
assert.deepStrictEqual(treatedData[1], { _id: '3', today: now })
})
it('If a doc contains $$deleted: true, that means we need to remove it from the data (with stream)', function (done) {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
model.serialize({ _id: '2', hello: 'world' }) + '\n' +
model.serialize({ _id: '1', $$deleted: true }) + '\n' +
model.serialize({ _id: '3', today: now })
const stream = new Readable()
stream.push(rawData)
stream.push(null)
d.persistence.treatRawStream(stream, function (err, result) {
assert.isNull(err)
const treatedData = result.data
treatedData.sort(function (a, b) { return a._id - b._id })
treatedData.length.should.equal(2)
assert.deepStrictEqual(treatedData[0], { _id: '2', hello: 'world' })
assert.deepStrictEqual(treatedData[1], { _id: '3', today: now })
done()
})
})
it('If a doc contains $$deleted: true, no error is thrown if the doc wasnt in the list before', function () {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
@ -121,6 +229,27 @@ describe('Persistence', function () {
assert.deepStrictEqual(treatedData[1], { _id: '3', today: now })
})
it('If a doc contains $$deleted: true, no error is thrown if the doc wasnt in the list before (with stream)', function (done) {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
model.serialize({ _id: '2', $$deleted: true }) + '\n' +
model.serialize({ _id: '3', today: now })
const stream = new Readable()
stream.push(rawData)
stream.push(null)
d.persistence.treatRawStream(stream, function (err, result) {
assert.isNull(err)
const treatedData = result.data
treatedData.sort(function (a, b) { return a._id - b._id })
treatedData.length.should.equal(2)
assert.deepStrictEqual(treatedData[0], { _id: '1', a: 2, ages: [1, 5, 12] })
assert.deepStrictEqual(treatedData[1], { _id: '3', today: now })
done()
})
})
it('If a doc contains $$indexCreated, no error is thrown during treatRawData and we can get the index options', function () {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
@ -138,6 +267,31 @@ describe('Persistence', function () {
assert.deepStrictEqual(treatedData[1], { _id: '3', today: now })
})
it('If a doc contains $$indexCreated, no error is thrown during treatRawData and we can get the index options (with stream)', function (done) {
const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
model.serialize({ $$indexCreated: { fieldName: 'test', unique: true } }) + '\n' +
model.serialize({ _id: '3', today: now })
const stream = new Readable()
stream.push(rawData)
stream.push(null)
d.persistence.treatRawStream(stream, function (err, result) {
assert.isNull(err)
const treatedData = result.data
const indexes = result.indexes
Object.keys(indexes).length.should.equal(1)
assert.deepStrictEqual(indexes.test, { fieldName: 'test', unique: true })
treatedData.sort(function (a, b) { return a._id - b._id })
treatedData.length.should.equal(2)
assert.deepStrictEqual(treatedData[0], { _id: '1', a: 2, ages: [1, 5, 12] })
assert.deepStrictEqual(treatedData[1], { _id: '3', today: now })
done()
})
})
it('Compact database on load', function (done) {
d.insert({ a: 2 }, function () {
d.insert({ a: 4 }, function () {

@ -114,6 +114,18 @@ fs.writeFile = function (path, data, options, callback_) {
}
}
fs.createWriteStream = function (path) {
let content = ''
return {
write (data) {
content += data
},
close (callback) {
fs.writeFile(path, content, callback)
}
}
}
// End of fs modification
const Nedb = require('../lib/datastore.js')
const db = new Nedb({ filename: 'workspace/lac.db' })

@ -27,6 +27,7 @@ module.exports = (env, argv) => {
plugins: [
new webpack.NormalModuleReplacementPlugin(new RegExp(path.resolve(__dirname, 'lib/storage.js')), path.resolve(__dirname, 'browser-version/lib/storage.js')),
new webpack.NormalModuleReplacementPlugin(new RegExp(path.resolve(__dirname, 'lib/customUtils.js')), path.resolve(__dirname, 'browser-version/lib/customUtils.js')),
new webpack.NormalModuleReplacementPlugin(/byline/, path.resolve(__dirname, 'browser-version/lib/byline.js')),
new webpack.ProvidePlugin({
process: 'process/browser',
Buffer: ['buffer', 'Buffer'],

Loading…
Cancel
Save