From 51ef65be179f60783f32f6f06bcd8d57429eea1b Mon Sep 17 00:00:00 2001 From: Louis Chatriot Date: Thu, 1 Jan 2015 13:00:38 +0100 Subject: [PATCH] Can now prevent loading of the database if too much data is corrupt --- lib/datastore.js | 2 ++ lib/persistence.js | 22 +++++++++++++++++----- test/persistence.test.js | 38 ++++++++++++++++++++++++++++++++++---- 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/lib/datastore.js b/lib/datastore.js index d4aaac0..9560f9d 100644 --- a/lib/datastore.js +++ b/lib/datastore.js @@ -19,6 +19,7 @@ var customUtils = require('./customUtils') * @param {Boolean} options.autoload Optional, defaults to false * @param {Function} options.onload Optional, if autoload is used this will be called after the load database with the error object as parameter. If you don't pass it the error will be thrown * @param {Function} options.afterSerialization and options.beforeDeserialization Optional, serialization hooks + * @param {Number} options.corruptAlertThreshold Optional, threshold after which an alert is thrown if too much data is corrupt */ function Datastore (options) { var filename; @@ -46,6 +47,7 @@ function Datastore (options) { this.persistence = new Persistence({ db: this, nodeWebkitAppName: options.nodeWebkitAppName , afterSerialization: options.afterSerialization , beforeDeserialization: options.beforeDeserialization + , corruptAlertThreshold: options.corruptAlertThreshold }); // This new executor is ready if we don't use persistence diff --git a/lib/persistence.js b/lib/persistence.js index 1886000..e0e0186 100644 --- a/lib/persistence.js +++ b/lib/persistence.js @@ -26,6 +26,7 @@ function Persistence (options) { this.db = options.db; this.inMemoryOnly = this.db.inMemoryOnly; this.filename = this.db.filename; + this.corruptAlertThreshold = options.corruptAlertThreshold !== undefined ? options.corruptAlertThreshold : 0.1; if (!this.inMemoryOnly && this.filename) { if (this.filename.charAt(this.filename.length - 1) === '~') { @@ -241,11 +242,12 @@ Persistence.prototype.treatRawData = function (rawData) { , tdata = [] , i , indexes = {} + , corruptItems = -1 // Last line of every data file is usually blank so not really corrupt ; - + for (i = 0; i < data.length; i += 1) { var doc; - + try { doc = model.deserialize(this.beforeDeserialization(data[i])); if (doc._id) { @@ -260,8 +262,14 @@ Persistence.prototype.treatRawData = function (rawData) { delete indexes[doc.$$indexRemoved]; } } catch (e) { + corruptItems += 1; } } + + // A bit lenient on corruption + if (data.length > 0 && corruptItems / data.length > this.corruptAlertThreshold) { + throw "More than 10% of the data file is corrupt, the wrong beforeDeserialization hook may be used. Cautiously refusing to start NeDB to prevent dataloss" + } Object.keys(dataById).forEach(function (k) { tdata.push(dataById[k]); @@ -320,10 +328,14 @@ Persistence.prototype.loadDatabase = function (cb) { Persistence.ensureDirectoryExists(path.dirname(self.filename), function (err) { self.ensureDatafileIntegrity(function (exists) { storage.readFile(self.filename, 'utf8', function (err, rawData) { - if (err) { return cb(err); } - var treatedData = self.treatRawData(rawData); - + + try { + var treatedData = self.treatRawData(rawData); + } catch (e) { + return cb(e); + } + // Recreate all indexes in the datafile Object.keys(treatedData.indexes).forEach(function (key) { self.db.indexes[key] = new Index(treatedData.indexes[key]); diff --git a/test/persistence.test.js b/test/persistence.test.js index 9e6e738..045e847 100644 --- a/test/persistence.test.js +++ b/test/persistence.test.js @@ -267,8 +267,39 @@ describe('Persistence', function () { }); }); }); + + it("When treating raw data, refuse to proceed if too much data is corrupt, to avoid data loss", function (done) { + var corruptTestFilename = 'workspace/corruptTest.db' + , fakeData = '{"_id":"one","hello":"world"}\n' + 'Some corrupt data\n' + '{"_id":"two","hello":"earth"}\n' + '{"_id":"three","hello":"you"}\n' + , d + ; + fs.writeFileSync(corruptTestFilename, fakeData, "utf8"); + + // Default corruptAlertThreshold + d = new Datastore({ filename: corruptTestFilename }); + d.loadDatabase(function (err) { + assert.isDefined(err); + assert.isNotNull(err); + + fs.writeFileSync(corruptTestFilename, fakeData, "utf8"); + d = new Datastore({ filename: corruptTestFilename, corruptAlertThreshold: 1 }); + d.loadDatabase(function (err) { + assert.isNull(err); + + fs.writeFileSync(corruptTestFilename, fakeData, "utf8"); + d = new Datastore({ filename: corruptTestFilename, corruptAlertThreshold: 0 }); + d.loadDatabase(function (err) { + assert.isDefined(err); + assert.isNotNull(err); + + done(); + }); + }); + }); + }); + - describe.only('Data can be persisted using serialization hooks', function () { + describe('Serialization hooks', function () { var as = function (s) { return "before_" + s + "_after"; } , bd = function (s) { return s.substring(7, s.length - 6); } @@ -316,9 +347,8 @@ describe('Persistence', function () { done(); }); - }); - + it("A serialization hook can be used to transform data before writing new state to disk", function (done) { var hookTestFilename = 'workspace/hookTest.db' Persistence.ensureFileDoesntExist(hookTestFilename, function () { @@ -507,7 +537,7 @@ describe('Persistence', function () { }); }); - }); + }); // ==== End of 'Serialization hooks' ==== // describe('Prevent dataloss when persisting data', function () {