diff --git a/CHANGELOG.md b/CHANGELOG.md index 9908e36..4a6fc68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,8 +12,10 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - The JSDoc is now much more exhaustive. - An auto-generated JSDoc file is generated: [API.md](./API.md). - Added `Datastore#dropDatabaseAsync` and its callback equivalent. +- The Error given when there the `Datastore#corruptAlertThreshold` is reached now has three properties: `dataLength` which is the amount of lines in the database file (excluding empty lines), `corruptItems` which is the amount of corrupted lines, `corruptionRate` which the rate of corruption between 0 and 1. ### Changed +- The `corrpuptionAlertThreshold` now doesn't take into account empty lines. - The `Datastore#update`'s callback has its signature slightly changed. The `upsert` flag is always defined either at `true` or `false` but not `null` nor `undefined`, and `affectedDocuments` is `null` when none is given rather than diff --git a/lib/persistence.js b/lib/persistence.js index 561b136..f6fd43b 100755 --- a/lib/persistence.js +++ b/lib/persistence.js @@ -177,11 +177,13 @@ class Persistence { const data = rawData.split('\n') const dataById = {} const indexes = {} + let dataLength = data.length // Last line of every data file is usually blank so not really corrupt - let corruptItems = -1 + let corruptItems = 0 for (const datum of data) { + if (datum === '') { dataLength--; continue } try { const doc = model.deserialize(this.beforeDeserialization(datum)) if (doc._id) { @@ -195,10 +197,16 @@ class Persistence { } // A bit lenient on corruption - if ( - data.length > 0 && - corruptItems / data.length > this.corruptAlertThreshold - ) throw new Error(`More than ${Math.floor(100 * this.corruptAlertThreshold)}% of the data file is corrupt, the wrong beforeDeserialization hook may be used. Cautiously refusing to start NeDB to prevent dataloss`) + if (dataLength > 0) { + const corruptionRate = corruptItems / dataLength + if (corruptionRate > this.corruptAlertThreshold) { + const error = new Error(`${Math.floor(100 * corruptionRate)}% of the data file is corrupt, more than given corruptAlertThreshold (${Math.floor(100 * this.corruptAlertThreshold)}%). Cautiously refusing to start NeDB to prevent dataloss.`) + error.corruptionRate = corruptionRate + error.corruptItems = corruptItems + error.dataLength = dataLength + throw error + } + } const tdata = Object.values(dataById) @@ -225,13 +233,13 @@ class Persistence { const indexes = {} - // Last line of every data file is usually blank so not really corrupt - let corruptItems = -1 + let corruptItems = 0 - const lineStream = byline(rawStream, { keepEmptyLines: true }) - let length = 0 + const lineStream = byline(rawStream) + let dataLength = 0 lineStream.on('data', (line) => { + if (line === '') return try { const doc = model.deserialize(this.beforeDeserialization(line)) if (doc._id) { @@ -243,17 +251,22 @@ class Persistence { corruptItems += 1 } - length++ + dataLength++ }) lineStream.on('end', () => { // A bit lenient on corruption - if (length > 0 && corruptItems / length > this.corruptAlertThreshold) { - const err = new Error(`More than ${Math.floor(100 * this.corruptAlertThreshold)}% of the data file is corrupt, the wrong beforeDeserialization hook may be used. Cautiously refusing to start NeDB to prevent dataloss`) - reject(err, null) - return + if (dataLength > 0) { + const corruptionRate = corruptItems / dataLength + if (corruptionRate > this.corruptAlertThreshold) { + const error = new Error(`${Math.floor(100 * corruptionRate)}% of the data file is corrupt, more than given corruptAlertThreshold (${Math.floor(100 * this.corruptAlertThreshold)}%). Cautiously refusing to start NeDB to prevent dataloss.`) + error.corruptionRate = corruptionRate + error.corruptItems = corruptItems + error.dataLength = dataLength + reject(error, null) + return + } } - const data = Object.values(dataById) resolve({ data, indexes: indexes }) diff --git a/test/persistence.async.test.js b/test/persistence.async.test.js index 1623cd3..10ca915 100755 --- a/test/persistence.async.test.js +++ b/test/persistence.async.test.js @@ -60,6 +60,7 @@ describe('Persistence async', function () { }) it('Badly formatted lines have no impact on the treated data', function () { + d.persistence.corruptAlertThreshold = 1 // to prevent a corruption alert const now = new Date() const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' + 'garbage\n' + @@ -73,6 +74,7 @@ describe('Persistence async', function () { }) it('Badly formatted lines have no impact on the treated data (with stream)', async () => { + d.persistence.corruptAlertThreshold = 1 // to prevent a corruption alert const now = new Date() const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' + 'garbage\n' + @@ -366,14 +368,30 @@ describe('Persistence async', function () { // Default corruptAlertThreshold d = new Datastore({ filename: corruptTestFilename }) - await assert.rejects(() => d.loadDatabaseAsync()) + await assert.rejects(() => d.loadDatabaseAsync(), err => { + assert.ok(Object.prototype.hasOwnProperty.call(err, 'corruptionRate')) + assert.ok(Object.prototype.hasOwnProperty.call(err, 'corruptItems')) + assert.ok(Object.prototype.hasOwnProperty.call(err, 'dataLength')) + assert.equal(err.corruptionRate, 0.25) + assert.equal(err.corruptItems, 1) + assert.equal(err.dataLength, 4) + return true + }) await fs.writeFile(corruptTestFilename, fakeData, 'utf8') d = new Datastore({ filename: corruptTestFilename, corruptAlertThreshold: 1 }) await d.loadDatabaseAsync() await fs.writeFile(corruptTestFilename, fakeData, 'utf8') d = new Datastore({ filename: corruptTestFilename, corruptAlertThreshold: 0 }) - await assert.rejects(() => d.loadDatabaseAsync()) + await assert.rejects(() => d.loadDatabaseAsync(), err => { + assert.ok(Object.prototype.hasOwnProperty.call(err, 'corruptionRate')) + assert.ok(Object.prototype.hasOwnProperty.call(err, 'corruptItems')) + assert.ok(Object.prototype.hasOwnProperty.call(err, 'dataLength')) + assert.equal(err.corruptionRate, 0.25) + assert.equal(err.corruptItems, 1) + assert.equal(err.dataLength, 4) + return true + }) }) it('Can listen to compaction events', async () => { diff --git a/test/persistence.test.js b/test/persistence.test.js index 774af46..8703cfc 100755 --- a/test/persistence.test.js +++ b/test/persistence.test.js @@ -82,6 +82,7 @@ describe('Persistence', function () { }) it('Badly formatted lines have no impact on the treated data', function () { + d.persistence.corruptAlertThreshold = 1 // to prevent a corruption alert const now = new Date() const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' + 'garbage\n' + @@ -95,6 +96,7 @@ describe('Persistence', function () { }) it('Badly formatted lines have no impact on the treated data (with stream)', function (done) { + d.persistence.corruptAlertThreshold = 1 // to prevent a corruption alert const now = new Date() const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' + 'garbage\n' + @@ -424,6 +426,10 @@ describe('Persistence', function () { d.loadDatabase(function (err) { assert.isDefined(err) assert.isNotNull(err) + assert.hasAllKeys(err, ['corruptionRate', 'corruptItems', 'dataLength']) + assert.strictEqual(err.corruptionRate, 0.25) + assert.strictEqual(err.corruptItems, 1) + assert.strictEqual(err.dataLength, 4) fs.writeFileSync(corruptTestFilename, fakeData, 'utf8') d = new Datastore({ filename: corruptTestFilename, corruptAlertThreshold: 1 }) @@ -436,6 +442,11 @@ describe('Persistence', function () { assert.isDefined(err) assert.isNotNull(err) + assert.hasAllKeys(err, ['corruptionRate', 'corruptItems', 'dataLength']) + assert.strictEqual(err.corruptionRate, 0.25) + assert.strictEqual(err.corruptItems, 1) + assert.strictEqual(err.dataLength, 4) + done() }) })