From fce60d19faaf54c38262552817fbc31cbb273e10 Mon Sep 17 00:00:00 2001 From: Louis Chatriot Date: Mon, 10 Jun 2013 14:44:22 +0200 Subject: [PATCH] Able to use indexes for matches --- lib/datastore.js | 28 +++++++++++----- lib/executor.js | 3 -- lib/indexes.js | 24 +++++++++++--- package.json | 2 +- test/db.test.js | 78 ++++++++++++++++++++++++++++++++++++++++++++ test/indexes.test.js | 22 +++++++++++++ 6 files changed, 141 insertions(+), 16 deletions(-) diff --git a/lib/datastore.js b/lib/datastore.js index 2cf4f0e..bc9f3e7 100644 --- a/lib/datastore.js +++ b/lib/datastore.js @@ -178,9 +178,10 @@ Datastore.prototype.updateIndexes = function (oldDoc, newDoc) { /** * Return the list of candidates for a given query - * Very crude implementation for now, we return the candidates given by the first usable index if any - * Also indexes can only be used for direct matches (no $lt, $gt or array yet) - * This still gives a huge performance boost to finds (800x on a collection with 10k documents) + * Crude implementation for now, we return the candidates given by the first usable index if any + * We try the following query types, in this order: basic match, $in match, comparison match + * One way to make it better would be to enable the use of multiple indexes if the first usable index + * returns too much data. I may do it in the future. */ Datastore.prototype.getCandidates = function (query) { var indexNames = Object.keys(this.indexes) @@ -188,21 +189,32 @@ Datastore.prototype.getCandidates = function (query) { if (indexNames.length <= 1) { return this.getAllData(); } // No index defined (except _id), no specific candidate - // Usable query keys are the ones corresponding to a basic query (no use of $operators or arrays) + // For a basic match usableQueryKeys = []; Object.keys(query).forEach(function (k) { if (typeof query[k] === 'string' || typeof query[k] === 'number' || typeof query[k] === 'boolean' || util.isDate(query[k]) || query[k] === null) { usableQueryKeys.push(k); } }); - usableQueryKeys = _.intersection(usableQueryKeys, indexNames); - if (usableQueryKeys.length > 0) { return this.indexes[usableQueryKeys[0]].getMatching(query[usableQueryKeys[0]]); - } else { - return this.getAllData(); } + + // For a $in match + usableQueryKeys = []; + Object.keys(query).forEach(function (k) { + if (query[k].hasOwnProperty('$in')) { + usableQueryKeys.push(k); + } + }); + usableQueryKeys = _.intersection(usableQueryKeys, indexNames); + if (usableQueryKeys.length > 0) { + return this.indexes[usableQueryKeys[0]].getMatching(query[usableQueryKeys[0]].$in); + } + + // By default, return all the DB data + return this.getAllData(); }; diff --git a/lib/executor.js b/lib/executor.js index e8fb94f..a8eace0 100644 --- a/lib/executor.js +++ b/lib/executor.js @@ -1,7 +1,5 @@ /** * Responsible for sequentially executing actions on the database - * async.queue is actually slowing down the code (10% hit on inserts, 3% on updates/removes) - * That's not critical for the intended uses but I may reimplement this in the future */ var async = require('async') @@ -46,6 +44,5 @@ Executor.prototype.push = function () { - // Interface module.exports = Executor; diff --git a/lib/indexes.js b/lib/indexes.js index 5dec521..b5bf4e5 100644 --- a/lib/indexes.js +++ b/lib/indexes.js @@ -182,15 +182,31 @@ Index.prototype.revertUpdate = function (oldDoc, newDoc) { }; +// Append all elements in toAppend to array +function append (array, toAppend) { + var i; + + for (i = 0; i < toAppend.length; i += 1) { + array.push(toAppend[i]); + } +} + + /** - * Get all documents in index that match the query on fieldName - * For now only works with field equality (i.e. can't use the index for $lt query for example) - * And doesn't return non indexed docs + * Get all documents in index whose key match value (if it is a Thing) or one of the elements of value (if it is an array of Things) * @param {Thing} value Value to match the key against * @return {Array of documents} */ Index.prototype.getMatching = function (value) { - return this.tree.search(value); + var res, self = this; + + if (!util.isArray(value)) { + return this.tree.search(value); + } else { + res = []; + value.forEach(function (v) { append(res, self.getMatching(v)); }); + return res; + } }; diff --git a/package.json b/package.json index fbbdbaa..6e1e75c 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,7 @@ "dependencies": { "async": "~0.2.8", "underscore": "~1.4.4", - "binary-search-tree": "0.2.2" + "binary-search-tree": "0.2.3" }, "devDependencies": { "chai": "1.0.x", diff --git a/test/db.test.js b/test/db.test.js index d1f8d38..a67ccf2 100644 --- a/test/db.test.js +++ b/test/db.test.js @@ -449,6 +449,84 @@ describe('Database', function () { }); // ==== End of 'Insert' ==== // + describe('#getCandidates', function () { + + it('Can use an index to get docs with a basic match', function (done) { + d.ensureIndex({ fieldName: 'tf' }, function (err) { + d.insert({ tf: 4 }, function (err, _doc1) { + d.insert({ tf: 6 }, function () { + d.insert({ tf: 4, an: 'other' }, function (err, _doc2) { + d.insert({ tf: 9 }, function () { + var data = d.getCandidates({ r: 6, tf: 4 }) + , doc1 = _.find(data, function (d) { return d._id === _doc1._id; }) + , doc2 = _.find(data, function (d) { return d._id === _doc2._id; }) + ; + + data.length.should.equal(2); + assert.deepEqual(doc1, { _id: doc1._id, tf: 4 }); + assert.deepEqual(doc2, { _id: doc2._id, tf: 4, an: 'other' }); + + done(); + }); + }); + }); + }); + }); + }); + + it('Can use an index to get docs with a $in match', function (done) { + d.ensureIndex({ fieldName: 'tf' }, function (err) { + d.insert({ tf: 4 }, function (err) { + d.insert({ tf: 6 }, function (err, _doc1) { + d.insert({ tf: 4, an: 'other' }, function (err) { + d.insert({ tf: 9 }, function (err, _doc2) { + var data = d.getCandidates({ r: 6, tf: { $in: [6, 9, 5] } }) + , doc1 = _.find(data, function (d) { return d._id === _doc1._id; }) + , doc2 = _.find(data, function (d) { return d._id === _doc2._id; }) + ; + + data.length.should.equal(2); + assert.deepEqual(doc1, { _id: doc1._id, tf: 6 }); + assert.deepEqual(doc2, { _id: doc2._id, tf: 9 }); + + done(); + }); + }); + }); + }); + }); + }); + + it('If no index can be used, return the whole database', function (done) { + d.ensureIndex({ fieldName: 'tf' }, function (err) { + d.insert({ tf: 4 }, function (err, _doc1) { + d.insert({ tf: 6 }, function (err, _doc2) { + d.insert({ tf: 4, an: 'other' }, function (err, _doc3) { + d.insert({ tf: 9 }, function (err, _doc4) { + var data = d.getCandidates({ r: 6, notf: { $in: [6, 9, 5] } }) + , doc1 = _.find(data, function (d) { return d._id === _doc1._id; }) + , doc2 = _.find(data, function (d) { return d._id === _doc2._id; }) + , doc3 = _.find(data, function (d) { return d._id === _doc3._id; }) + , doc4 = _.find(data, function (d) { return d._id === _doc4._id; }) + ; + + data.length.should.equal(4); + assert.deepEqual(doc1, { _id: doc1._id, tf: 4 }); + assert.deepEqual(doc2, { _id: doc2._id, tf: 6 }); + assert.deepEqual(doc3, { _id: doc3._id, tf: 4, an: 'other' }); + assert.deepEqual(doc4, { _id: doc4._id, tf: 9 }); + + done(); + }); + }); + }); + }); + }); + }); + + }); // ==== End of '#getCandidates' ==== // + + describe('Find', function () { it('Can find all documents if an empty query is used', function (done) { diff --git a/test/indexes.test.js b/test/indexes.test.js index 5ce89de..9ccd857 100644 --- a/test/indexes.test.js +++ b/test/indexes.test.js @@ -526,6 +526,28 @@ describe('Indexes', function () { assert.deepEqual(idx.getMatching(undefined), []); }); + it('Can get all documents whose key is in an array of keys', function () { + var idx = new Index({ fieldName: 'tf' }) + , doc1 = { a: 5, tf: 'hello' } + , doc2 = { a: 2, tf: 'bloup' } + , doc3 = { a: 8, tf: 'world' } + , doc4 = { a: 7, tf: 'yes' } + , doc5 = { a: 7, tf: 'yes' } + ; + + idx.insert(doc1); + idx.insert(doc2); + idx.insert(doc3); + idx.insert(doc4); + idx.insert(doc5); + + assert.deepEqual(idx.getMatching([]), []); + assert.deepEqual(idx.getMatching(['bloup']), [doc2]); + assert.deepEqual(idx.getMatching(['bloup', 'yes']), [doc2, doc4, doc5]); + assert.deepEqual(idx.getMatching(['hello', 'no']), [doc1]); + assert.deepEqual(idx.getMatching(['nope', 'no']), []); + }); + }); // ==== End of 'Get matching documents' ==== //