give the corruptionRate when there is an error

pull/11/head
Timothée Rebours 3 years ago
parent 67a0c885b5
commit ebda00b8d2
  1. 2
      CHANGELOG.md
  2. 43
      lib/persistence.js
  3. 22
      test/persistence.async.test.js
  4. 11
      test/persistence.test.js

@ -12,8 +12,10 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
- The JSDoc is now much more exhaustive. - The JSDoc is now much more exhaustive.
- An auto-generated JSDoc file is generated: [API.md](./API.md). - An auto-generated JSDoc file is generated: [API.md](./API.md).
- Added `Datastore#dropDatabaseAsync` and its callback equivalent. - Added `Datastore#dropDatabaseAsync` and its callback equivalent.
- The Error given when there the `Datastore#corruptAlertThreshold` is reached now has three properties: `dataLength` which is the amount of lines in the database file (excluding empty lines), `corruptItems` which is the amount of corrupted lines, `corruptionRate` which the rate of corruption between 0 and 1.
### Changed ### Changed
- The `corrpuptionAlertThreshold` now doesn't take into account empty lines.
- The `Datastore#update`'s callback has its signature slightly changed. The - The `Datastore#update`'s callback has its signature slightly changed. The
`upsert` flag is always defined either at `true` or `false` but not `null` nor `upsert` flag is always defined either at `true` or `false` but not `null` nor
`undefined`, and `affectedDocuments` is `null` when none is given rather than `undefined`, and `affectedDocuments` is `null` when none is given rather than

@ -177,11 +177,13 @@ class Persistence {
const data = rawData.split('\n') const data = rawData.split('\n')
const dataById = {} const dataById = {}
const indexes = {} const indexes = {}
let dataLength = data.length
// Last line of every data file is usually blank so not really corrupt // Last line of every data file is usually blank so not really corrupt
let corruptItems = -1 let corruptItems = 0
for (const datum of data) { for (const datum of data) {
if (datum === '') { dataLength--; continue }
try { try {
const doc = model.deserialize(this.beforeDeserialization(datum)) const doc = model.deserialize(this.beforeDeserialization(datum))
if (doc._id) { if (doc._id) {
@ -195,10 +197,16 @@ class Persistence {
} }
// A bit lenient on corruption // A bit lenient on corruption
if ( if (dataLength > 0) {
data.length > 0 && const corruptionRate = corruptItems / dataLength
corruptItems / data.length > this.corruptAlertThreshold if (corruptionRate > this.corruptAlertThreshold) {
) throw new Error(`More than ${Math.floor(100 * this.corruptAlertThreshold)}% of the data file is corrupt, the wrong beforeDeserialization hook may be used. Cautiously refusing to start NeDB to prevent dataloss`) const error = new Error(`${Math.floor(100 * corruptionRate)}% of the data file is corrupt, more than given corruptAlertThreshold (${Math.floor(100 * this.corruptAlertThreshold)}%). Cautiously refusing to start NeDB to prevent dataloss.`)
error.corruptionRate = corruptionRate
error.corruptItems = corruptItems
error.dataLength = dataLength
throw error
}
}
const tdata = Object.values(dataById) const tdata = Object.values(dataById)
@ -225,13 +233,13 @@ class Persistence {
const indexes = {} const indexes = {}
// Last line of every data file is usually blank so not really corrupt let corruptItems = 0
let corruptItems = -1
const lineStream = byline(rawStream, { keepEmptyLines: true }) const lineStream = byline(rawStream)
let length = 0 let dataLength = 0
lineStream.on('data', (line) => { lineStream.on('data', (line) => {
if (line === '') return
try { try {
const doc = model.deserialize(this.beforeDeserialization(line)) const doc = model.deserialize(this.beforeDeserialization(line))
if (doc._id) { if (doc._id) {
@ -243,17 +251,22 @@ class Persistence {
corruptItems += 1 corruptItems += 1
} }
length++ dataLength++
}) })
lineStream.on('end', () => { lineStream.on('end', () => {
// A bit lenient on corruption // A bit lenient on corruption
if (length > 0 && corruptItems / length > this.corruptAlertThreshold) { if (dataLength > 0) {
const err = new Error(`More than ${Math.floor(100 * this.corruptAlertThreshold)}% of the data file is corrupt, the wrong beforeDeserialization hook may be used. Cautiously refusing to start NeDB to prevent dataloss`) const corruptionRate = corruptItems / dataLength
reject(err, null) if (corruptionRate > this.corruptAlertThreshold) {
return const error = new Error(`${Math.floor(100 * corruptionRate)}% of the data file is corrupt, more than given corruptAlertThreshold (${Math.floor(100 * this.corruptAlertThreshold)}%). Cautiously refusing to start NeDB to prevent dataloss.`)
error.corruptionRate = corruptionRate
error.corruptItems = corruptItems
error.dataLength = dataLength
reject(error, null)
return
}
} }
const data = Object.values(dataById) const data = Object.values(dataById)
resolve({ data, indexes: indexes }) resolve({ data, indexes: indexes })

@ -60,6 +60,7 @@ describe('Persistence async', function () {
}) })
it('Badly formatted lines have no impact on the treated data', function () { it('Badly formatted lines have no impact on the treated data', function () {
d.persistence.corruptAlertThreshold = 1 // to prevent a corruption alert
const now = new Date() const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' + const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
'garbage\n' + 'garbage\n' +
@ -73,6 +74,7 @@ describe('Persistence async', function () {
}) })
it('Badly formatted lines have no impact on the treated data (with stream)', async () => { it('Badly formatted lines have no impact on the treated data (with stream)', async () => {
d.persistence.corruptAlertThreshold = 1 // to prevent a corruption alert
const now = new Date() const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' + const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
'garbage\n' + 'garbage\n' +
@ -366,14 +368,30 @@ describe('Persistence async', function () {
// Default corruptAlertThreshold // Default corruptAlertThreshold
d = new Datastore({ filename: corruptTestFilename }) d = new Datastore({ filename: corruptTestFilename })
await assert.rejects(() => d.loadDatabaseAsync()) await assert.rejects(() => d.loadDatabaseAsync(), err => {
assert.ok(Object.prototype.hasOwnProperty.call(err, 'corruptionRate'))
assert.ok(Object.prototype.hasOwnProperty.call(err, 'corruptItems'))
assert.ok(Object.prototype.hasOwnProperty.call(err, 'dataLength'))
assert.equal(err.corruptionRate, 0.25)
assert.equal(err.corruptItems, 1)
assert.equal(err.dataLength, 4)
return true
})
await fs.writeFile(corruptTestFilename, fakeData, 'utf8') await fs.writeFile(corruptTestFilename, fakeData, 'utf8')
d = new Datastore({ filename: corruptTestFilename, corruptAlertThreshold: 1 }) d = new Datastore({ filename: corruptTestFilename, corruptAlertThreshold: 1 })
await d.loadDatabaseAsync() await d.loadDatabaseAsync()
await fs.writeFile(corruptTestFilename, fakeData, 'utf8') await fs.writeFile(corruptTestFilename, fakeData, 'utf8')
d = new Datastore({ filename: corruptTestFilename, corruptAlertThreshold: 0 }) d = new Datastore({ filename: corruptTestFilename, corruptAlertThreshold: 0 })
await assert.rejects(() => d.loadDatabaseAsync()) await assert.rejects(() => d.loadDatabaseAsync(), err => {
assert.ok(Object.prototype.hasOwnProperty.call(err, 'corruptionRate'))
assert.ok(Object.prototype.hasOwnProperty.call(err, 'corruptItems'))
assert.ok(Object.prototype.hasOwnProperty.call(err, 'dataLength'))
assert.equal(err.corruptionRate, 0.25)
assert.equal(err.corruptItems, 1)
assert.equal(err.dataLength, 4)
return true
})
}) })
it('Can listen to compaction events', async () => { it('Can listen to compaction events', async () => {

@ -82,6 +82,7 @@ describe('Persistence', function () {
}) })
it('Badly formatted lines have no impact on the treated data', function () { it('Badly formatted lines have no impact on the treated data', function () {
d.persistence.corruptAlertThreshold = 1 // to prevent a corruption alert
const now = new Date() const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' + const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
'garbage\n' + 'garbage\n' +
@ -95,6 +96,7 @@ describe('Persistence', function () {
}) })
it('Badly formatted lines have no impact on the treated data (with stream)', function (done) { it('Badly formatted lines have no impact on the treated data (with stream)', function (done) {
d.persistence.corruptAlertThreshold = 1 // to prevent a corruption alert
const now = new Date() const now = new Date()
const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' + const rawData = model.serialize({ _id: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
'garbage\n' + 'garbage\n' +
@ -424,6 +426,10 @@ describe('Persistence', function () {
d.loadDatabase(function (err) { d.loadDatabase(function (err) {
assert.isDefined(err) assert.isDefined(err)
assert.isNotNull(err) assert.isNotNull(err)
assert.hasAllKeys(err, ['corruptionRate', 'corruptItems', 'dataLength'])
assert.strictEqual(err.corruptionRate, 0.25)
assert.strictEqual(err.corruptItems, 1)
assert.strictEqual(err.dataLength, 4)
fs.writeFileSync(corruptTestFilename, fakeData, 'utf8') fs.writeFileSync(corruptTestFilename, fakeData, 'utf8')
d = new Datastore({ filename: corruptTestFilename, corruptAlertThreshold: 1 }) d = new Datastore({ filename: corruptTestFilename, corruptAlertThreshold: 1 })
@ -436,6 +442,11 @@ describe('Persistence', function () {
assert.isDefined(err) assert.isDefined(err)
assert.isNotNull(err) assert.isNotNull(err)
assert.hasAllKeys(err, ['corruptionRate', 'corruptItems', 'dataLength'])
assert.strictEqual(err.corruptionRate, 0.25)
assert.strictEqual(err.corruptItems, 1)
assert.strictEqual(err.dataLength, 4)
done() done()
}) })
}) })

Loading…
Cancel
Save