From fbe2c828e706e6c5cb7edbec8bc12d2fb7c22f9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hermann?= Date: Sun, 23 Oct 2022 19:25:15 -0400 Subject: [PATCH] implement compound indexes --- README.md | 16 ++++++- index.d.ts | 6 +-- lib/datastore.js | 29 +++++++++--- lib/indexes.js | 6 +-- lib/model.js | 18 +++++++ lib/persistence.js | 2 +- lib/utils.js | 21 +++++++++ test/db.async.test.js | 73 +++++++++++++++++++++++++++++ test/db.test.js | 106 +++++++++++++++++++++++++++++++++++++++++- test/indexes.test.js | 65 ++++++++++++++++++++++++++ 10 files changed, 326 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 7ca048c..2384d61 100755 --- a/README.md +++ b/README.md @@ -144,7 +144,7 @@ If the document does not contain an `_id` field, NeDB will automatically generate one for you (a 16-characters alphanumerical string). The `_id` of a document, once set, cannot be modified. -Field names cannot begin by '$' or contain a '.'. +Field names cannot start with '$' or contain the characters '.' and ','. ```javascript const doc = { @@ -698,6 +698,15 @@ fields in nested documents using the dot notation. For now, indexes are only used to speed up basic queries and queries using `$in`, `$lt`, `$lte`, `$gt` and `$gte`. The indexed values cannot be of type array of object. +**Breaking change**: [since v3.2.0](./CHANGELOG.md), comma can no longer be used in indexed field names. + +The following is illegal: +```javascript +db.ensureIndexAsync({ fieldName: 'some,field' }) +db.ensureIndexAsync({ fieldName: ['some,field', 'other,field'] }) +``` +This is a side effect of the compound index implementation. + To create an index, use [`datastore#ensureIndexAsync(options)`](./API.md#Datastore+ensureIndexAsync). It resolves when the index is persisted on disk (if the database is persistent) and may throw an Error (usually a unique constraint that was violated). It can @@ -705,7 +714,7 @@ be called when you want, even after some data was inserted, though it's best to call it at application startup. The options are: * **fieldName** (required): name of the field to index. Use the dot notation to - index a field in a nested document. + index a field in a nested document. For a compound index, use an array of field names. * **unique** (optional, defaults to `false`): enforce field uniqueness. * **sparse** (optional, defaults to `false`): don't index documents for which the field is not defined. @@ -735,6 +744,9 @@ await db.ensureIndexAsync({ sparse: true }) +// Using a compound index +await db.ensureIndexAsync({ fieldName: ["field1", "field2"] }); + try { // Format of the error message when the unique constraint is not met await db.insertAsync({ somefield: '@seald-io/nedb' }) diff --git a/index.d.ts b/index.d.ts index b3059ab..0be859e 100644 --- a/index.d.ts +++ b/index.d.ts @@ -42,9 +42,9 @@ declare class Nedb extends EventEmitter { ensureIndexAsync(options: Nedb.EnsureIndexOptions): Promise; - removeIndex(fieldName: string, callback?: (err: Error | null) => void): void; + removeIndex(fieldName: string | string[], callback?: (err: Error | null) => void): void; - removeIndexAsync(fieldName: string): Promise; + removeIndexAsync(fieldName: string | string[]): Promise; insert(newDoc: T, callback?: (err: Error | null, document: T) => void): void; insert(newDocs: T[], callback?: (err: Error | null, documents: T[]) => void): void; @@ -128,7 +128,7 @@ declare namespace Nedb { } interface EnsureIndexOptions { - fieldName: string; + fieldName: string | string[]; unique?: boolean; sparse?: boolean; expireAfterSeconds?: number; diff --git a/lib/datastore.js b/lib/datastore.js index 1eabc88..11050b9 100755 --- a/lib/datastore.js +++ b/lib/datastore.js @@ -6,7 +6,7 @@ const Executor = require('./executor.js') const Index = require('./indexes.js') const model = require('./model.js') const Persistence = require('./persistence.js') -const { isDate } = require('./utils.js') +const { isDate, pick, filterIndexNames } = require('./utils.js') /** * Callback with no parameter @@ -449,7 +449,7 @@ class Datastore extends EventEmitter { * executor. * @param {object} options * @param {string} options.fieldName Name of the field to index. Use the dot notation to index a field in a nested - * document. + * document. For a compound index, use an array of field names. Using a comma in a field name is not permitted. * @param {boolean} [options.unique = false] Enforce field uniqueness. Note that a unique index will raise an error * if you try to index two documents for which the field is not defined. * @param {boolean} [options.sparse = false] Don't index documents for which the field is not defined. Use this option @@ -465,6 +465,12 @@ class Datastore extends EventEmitter { err.missingFieldName = true throw err } + if (Array.isArray(options.fieldName)) { + options.fieldName.sort() + } + if ([].concat(options.fieldName).some(field => field.includes(','))) { + throw new Error('Cannot use comma in index fieldName') + } if (this.indexes[options.fieldName]) return this.indexes[options.fieldName] = new Index(options) @@ -598,16 +604,27 @@ class Datastore extends EventEmitter { */ _getRawCandidates (query) { const indexNames = Object.keys(this.indexes) + // STEP 1: get candidates list by checking indexes from most to least frequent usecase // For a basic match + let usableQuery usableQuery = Object.entries(query) - .filter(([k, v]) => - !!(typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean' || isDate(v) || v === null) && - indexNames.includes(k) - ) + .filter(filterIndexNames(indexNames)) .pop() if (usableQuery) return this.indexes[usableQuery[0]].getMatching(usableQuery[1]) + + // For a compound match + const compoundQueryKeys = indexNames + .filter(indexName => indexName.indexOf(',') !== -1) + .map(indexName => indexName.split(',')) + .filter(subIndexNames => + Object.entries(query) + .filter(filterIndexNames(subIndexNames)).length === subIndexNames.length + ) + + if (compoundQueryKeys.length > 0) return this.indexes[compoundQueryKeys[0]].getMatching(pick(query, compoundQueryKeys[0])) + // For a $in match usableQuery = Object.entries(query) .filter(([k, v]) => diff --git a/lib/indexes.js b/lib/indexes.js index fee3ccc..e1b5376 100755 --- a/lib/indexes.js +++ b/lib/indexes.js @@ -99,10 +99,10 @@ class Index { return } - const key = model.getDotValue(doc, this.fieldName) + const key = model.getDotValues(doc, this.fieldName) // We don't index documents that don't contain the field if the index is sparse - if (key === undefined && this.sparse) return + if ((key === undefined || (typeof key === 'object' && key !== null && Object.values(key).every(el => el === undefined))) && this.sparse) return if (!Array.isArray(key)) this.tree.insert(key, doc) else { @@ -171,7 +171,7 @@ class Index { return } - const key = model.getDotValue(doc, this.fieldName) + const key = model.getDotValues(doc, this.fieldName) if (key === undefined && this.sparse) return diff --git a/lib/model.js b/lib/model.js index b310f07..9ba9724 100755 --- a/lib/model.js +++ b/lib/model.js @@ -503,6 +503,23 @@ const getDotValue = (obj, field) => { } else return getDotValue(obj[fieldParts[0]], fieldParts.slice(1)) } +/** + * Get dot values for either a bunch of fields or just one. + */ +const getDotValues = (obj, fields) => { + if (Array.isArray(fields)) { + const key = {} + const len = fields.length + for (let i = 0; i < len; i++) { + const field = fields[i] + key[field] = getDotValue(obj, field) + } + return key + } else { + return getDotValue(obj, fields) + } +} + /** * Check whether 'things' are equal * Things are defined as any native types (string, number, boolean, null, date) and objects @@ -807,6 +824,7 @@ module.exports.checkObject = checkObject module.exports.isPrimitiveType = isPrimitiveType module.exports.modify = modify module.exports.getDotValue = getDotValue +module.exports.getDotValues = getDotValues module.exports.match = match module.exports.areThingsEqual = areThingsEqual module.exports.compareThings = compareThings diff --git a/lib/persistence.js b/lib/persistence.js index 5f96d0a..c704a7c 100755 --- a/lib/persistence.js +++ b/lib/persistence.js @@ -107,7 +107,7 @@ class Persistence { if (fieldName !== '_id') { // The special _id index is managed by datastore.js, the others need to be persisted lines.push(this.afterSerialization(model.serialize({ $$indexCreated: { - fieldName: fieldName, + fieldName: this.db.indexes[fieldName].fieldName, unique: this.db.indexes[fieldName].unique, sparse: this.db.indexes[fieldName].sparse } diff --git a/lib/utils.js b/lib/utils.js index 951df8f..9641972 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -58,6 +58,27 @@ const isDate = d => isObject(d) && Object.prototype.toString.call(d) === '[objec */ const isRegExp = re => isObject(re) && Object.prototype.toString.call(re) === '[object RegExp]' +/** + * return a copy of the object that filtered using the given keys + * + * @param {*} object + * @param {*} keys + * @returns + */ +const pick = (object, keys) => { + return keys.reduce((obj, key) => { + if (object && Object.prototype.hasOwnProperty.call(object, key)) { + obj[key] = object[key] + } + return obj + }, {}) +} + +const filterIndexNames = (indexNames) => ([k, v]) => !!(typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean' || isDate(v) || v === null) && +indexNames.includes(k) + module.exports.uniq = uniq module.exports.isDate = isDate module.exports.isRegExp = isRegExp +module.exports.pick = pick +module.exports.filterIndexNames = filterIndexNames diff --git a/test/db.async.test.js b/test/db.async.test.js index 3da4b26..a8ca172 100644 --- a/test/db.async.test.js +++ b/test/db.async.test.js @@ -309,6 +309,18 @@ describe('Database async', function () { assert.deepEqual(doc2, { _id: doc2._id, tf: 4, an: 'other' }) }) + it('Can use a compound index to get docs with a basic match', async () => { + await d.ensureIndexAsync({ fieldName: ['tf', 'tg'] }) + await d.insertAsync({ tf: 4, tg: 0, foo: 1 }) + await d.insertAsync({ tf: 6, tg: 0, foo: 2 }) + const _doc1 = await d.insertAsync({ tf: 4, tg: 1, foo: 3 }) + await d.insertAsync({ tf: 6, tg: 1, foo: 4 }) + const data = await d._getCandidatesAsync({ tf: 4, tg: 1 }) + const doc1 = data.find(d => d._id === _doc1._id) + assert.equal(data.length, 1) + assert.deepEqual(doc1, { _id: doc1._id, tf: 4, tg: 1, foo: 3 }) + }) + it('Can use an index to get docs with a $in match', async () => { await d.ensureIndexAsync({ fieldName: 'tf' }) await d.insertAsync({ tf: 4 }) @@ -1315,6 +1327,67 @@ describe('Database async', function () { assert.equal(d.indexes.planet.getAll().length, 2) }) + it('ensureIndex can be called twice on the same compound field, the second call will ahve no effect', async () => { + assert.equal(Object.keys(d.indexes).length, 1) + assert.equal(Object.keys(d.indexes)[0], '_id') + + await d.insertAsync({ star: 'sun', planet: 'Earth' }) + await d.insertAsync({ star: 'sun', planet: 'Mars' }) + const docs = await d.findAsync({}) + assert.equal(docs.length, 2) + + await d.ensureIndexAsync({ fieldName: ['star', 'planet'] }) + assert.equal(Object.keys(d.indexes).length, 2) + assert.equal(Object.keys(d.indexes)[0], '_id') + assert.equal(Object.keys(d.indexes)[1], 'planet,star') + + assert.equal(d.indexes['planet,star'].getAll().length, 2) + + // This second call has no effect, documents don't get inserted twice in the index + await d.ensureIndexAsync({ fieldName: ['star', 'planet'] }) + assert.equal(Object.keys(d.indexes).length, 2) + assert.equal(Object.keys(d.indexes)[0], '_id') + assert.equal(Object.keys(d.indexes)[1], 'planet,star') + + assert.equal(d.indexes['planet,star'].getAll().length, 2) + }) + + it('ensureIndex can be called twice on the same compound field with a different order, the second call will ahve no effect', async () => { + assert.equal(Object.keys(d.indexes).length, 1) + assert.equal(Object.keys(d.indexes)[0], '_id') + + await d.insertAsync({ star: 'sun', planet: 'Earth' }) + await d.insertAsync({ star: 'sun', planet: 'Mars' }) + const docs = await d.findAsync({}) + assert.equal(docs.length, 2) + + await d.ensureIndexAsync({ fieldName: ['star', 'planet'] }) + assert.equal(Object.keys(d.indexes).length, 2) + assert.equal(Object.keys(d.indexes)[0], '_id') + assert.equal(Object.keys(d.indexes)[1], 'planet,star') + + assert.equal(d.indexes['planet,star'].getAll().length, 2) + + // This second call has no effect, documents don't get inserted twice in the index + await d.ensureIndexAsync({ fieldName: ['planet', 'star'] }) + assert.equal(Object.keys(d.indexes).length, 2) + assert.equal(Object.keys(d.indexes)[0], '_id') + assert.equal(Object.keys(d.indexes)[1], 'planet,star') + + assert.equal(d.indexes['planet,star'].getAll().length, 2) + }) + + it('ensureIndex cannot be called with an illegal field name', async () => { + await assert.rejects(() => d.ensureIndexAsync({ fieldName: 'star,planet' }), err => { + assert.notEqual(err, null) + return true + }) + await assert.rejects(() => d.ensureIndexAsync({ fieldName: ['star,planet', 'other'] }), err => { + assert.notEqual(err, null) + return true + }) + }) + it('ensureIndex can be called after the data set was modified and the index still be correct', async () => { const rawData = model.serialize({ _id: 'aaa', z: '1', a: 2, ages: [1, 5, 12] }) + '\n' + model.serialize({ _id: 'bbb', z: '2', hello: 'world' }) diff --git a/test/db.test.js b/test/db.test.js index 70c9561..40d1ffa 100755 --- a/test/db.test.js +++ b/test/db.test.js @@ -490,6 +490,30 @@ describe('Database', function () { }) }) + it('Can use a compound index to get docs with a basic match', function (done) { + // eslint-disable-next-line node/handle-callback-err + d.ensureIndex({ fieldName: ['tf', 'tg'] }, function (err) { + d.insert({ tf: 4, tg: 0, foo: 1 }, function () { + d.insert({ tf: 6, tg: 0, foo: 2 }, function () { + // eslint-disable-next-line node/handle-callback-err + d.insert({ tf: 4, tg: 1, foo: 3 }, function (err, _doc1) { + d.insert({ tf: 6, tg: 1, foo: 4 }, function () { + // eslint-disable-next-line node/handle-callback-err + callbackify(query => d._getCandidatesAsync(query))({ tf: 4, tg: 1 }, function (err, data) { + const doc1 = data.find(function (d) { return d._id === _doc1._id }) + + data.length.should.equal(1) + assert.deepEqual(doc1, { _id: doc1._id, tf: 4, tg: 1, foo: 3 }) + + done() + }) + }) + }) + }) + }) + }) + }) + it('Can use an index to get docs with a $in match', function (done) { // eslint-disable-next-line node/handle-callback-err d.ensureIndex({ fieldName: 'tf' }, function (err) { @@ -2082,7 +2106,7 @@ describe('Database', function () { }) }) - it('ensureIndex can be called twice on the same field, the second call will ahve no effect', function (done) { + it('ensureIndex can be called twice on the same field, the second call will have no effect', function (done) { Object.keys(d.indexes).length.should.equal(1) Object.keys(d.indexes)[0].should.equal('_id') @@ -2117,6 +2141,86 @@ describe('Database', function () { }) }) + it('ensureIndex can be called twice on the same compound fields, the second call will have no effect', function (done) { + Object.keys(d.indexes).length.should.equal(1) + Object.keys(d.indexes)[0].should.equal('_id') + + d.insert({ star: 'sun', planet: 'Earth' }, function () { + d.insert({ star: 'sun', planet: 'Mars' }, function () { + // eslint-disable-next-line node/handle-callback-err + d.find({}, function (err, docs) { + docs.length.should.equal(2) + + d.ensureIndex({ fieldName: ['star', 'planet'] }, function (err) { + assert.isNull(err) + Object.keys(d.indexes).length.should.equal(2) + Object.keys(d.indexes)[0].should.equal('_id') + Object.keys(d.indexes)[1].should.equal('planet,star') + + d.indexes['planet,star'].getAll().length.should.equal(2) + + // This second call has no effect, documents don't get inserted twice in the index + d.ensureIndex({ fieldName: ['star', 'planet'] }, function (err) { + assert.isNull(err) + Object.keys(d.indexes).length.should.equal(2) + Object.keys(d.indexes)[0].should.equal('_id') + Object.keys(d.indexes)[1].should.equal('planet,star') + + d.indexes['planet,star'].getAll().length.should.equal(2) + + done() + }) + }) + }) + }) + }) + }) + + it('ensureIndex can be called twice on the same compound fields with a different order, the second call will have no effect', function (done) { + Object.keys(d.indexes).length.should.equal(1) + Object.keys(d.indexes)[0].should.equal('_id') + + d.insert({ star: 'sun', planet: 'Earth' }, function () { + d.insert({ star: 'sun', planet: 'Mars' }, function () { + // eslint-disable-next-line node/handle-callback-err + d.find({}, function (err, docs) { + docs.length.should.equal(2) + + d.ensureIndex({ fieldName: ['star', 'planet'] }, function (err) { + assert.isNull(err) + Object.keys(d.indexes).length.should.equal(2) + Object.keys(d.indexes)[0].should.equal('_id') + Object.keys(d.indexes)[1].should.equal('planet,star') + + d.indexes['planet,star'].getAll().length.should.equal(2) + + // This second call has no effect, documents don't get inserted twice in the index + d.ensureIndex({ fieldName: ['planet', 'star'] }, function (err) { + assert.isNull(err) + Object.keys(d.indexes).length.should.equal(2) + Object.keys(d.indexes)[0].should.equal('_id') + Object.keys(d.indexes)[1].should.equal('planet,star') + + d.indexes['planet,star'].getAll().length.should.equal(2) + + done() + }) + }) + }) + }) + }) + }) + + it('ensureIndex cannot be called with an illegal field name', function (done) { + d.ensureIndex({ fieldName: 'star,planet' }, function (err) { + assert.isNotNull(err) + d.ensureIndex({ fieldName: ['star,planet', 'other'] }, function (err) { + assert.isNotNull(err) + done() + }) + }) + }) + it('ensureIndex can be called after the data set was modified and the index still be correct', function (done) { const rawData = model.serialize({ _id: 'aaa', z: '1', a: 2, ages: [1, 5, 12] }) + '\n' + model.serialize({ _id: 'bbb', z: '2', hello: 'world' }) diff --git a/test/indexes.test.js b/test/indexes.test.js index ef45dba..3695204 100755 --- a/test/indexes.test.js +++ b/test/indexes.test.js @@ -29,6 +29,28 @@ describe('Indexes', function () { doc3.a.should.equal(42) }) + it('Can insert pointers to documents in the index correctly when they have compound fields', function () { + const idx = new Index({ fieldName: ['tf', 'tg'] }) + const doc1 = { a: 5, tf: 'hello', tg: 'world' } + const doc2 = { a: 8, tf: 'hello', tg: 'bloup' } + const doc3 = { a: 2, tf: 'bloup', tg: 'bloup' } + + idx.insert(doc1) + idx.insert(doc2) + idx.insert(doc3) + + // The underlying BST now has 3 nodes which contain the docs where it's expected + idx.tree.getNumberOfKeys().should.equal(3) + assert.deepEqual(idx.tree.search({ tf: 'hello', tg: 'world' }), [{ a: 5, tf: 'hello', tg: 'world' }]) + assert.deepEqual(idx.tree.search({ tf: 'hello', tg: 'bloup' }), [{ a: 8, tf: 'hello', tg: 'bloup' }]) + assert.deepEqual(idx.tree.search({ tf: 'bloup', tg: 'bloup' }), [{ a: 2, tf: 'bloup', tg: 'bloup' }]) + + // The nodes contain pointers to the actual documents + idx.tree.search({ tf: 'hello', tg: 'bloup' })[0].should.equal(doc2) + idx.tree.search({ tf: 'bloup', tg: 'bloup' })[0].a = 42 + doc3.a.should.equal(42) + }) + it('Inserting twice for the same fieldName in a unique index will result in an error thrown', function () { const idx = new Index({ fieldName: 'tf', unique: true }) const doc1 = { a: 5, tf: 'hello' } @@ -58,6 +80,25 @@ describe('Indexes', function () { idx.tree.getNumberOfKeys().should.equal(0) // Docs are not indexed }) + it('Inserting twice for the same compound fieldName in a unique index will result in an error thrown', function () { + const idx = new Index({ fieldName: ['tf', 'tg'], unique: true }) + const doc1 = { a: 5, tf: 'hello', tg: 'world' } + + idx.insert(doc1) + idx.tree.getNumberOfKeys().should.equal(1); + (function () { idx.insert(doc1) }).should.throw() + }) + + it('Inserting twice for a compound fieldName the docs dont have with a unique and sparse index will not throw, since the docs will be non indexed', function () { + const idx = new Index({ fieldName: ['nope', 'nopeNope'], unique: true, sparse: true }) + const doc1 = { a: 5, tf: 'hello' } + const doc2 = { a: 5, tf: 'world' } + + idx.insert(doc1) + idx.insert(doc2) + idx.tree.getNumberOfKeys().should.equal(0) // Docs are not indexed + }) + it('Works with dot notation', function () { const idx = new Index({ fieldName: 'tf.nested' }) const doc1 = { a: 5, tf: { nested: 'hello' } } @@ -202,6 +243,30 @@ describe('Indexes', function () { idx.getMatching('ee').length.should.equal(0) }) }) // ==== End of 'Array fields' ==== // + + describe('Compound Indexes', function () { + it('Supports arrays of fieldNames', function () { + const idx = new Index({ fieldName: ['tf', 'tf2'] }) + const doc1 = { a: 5, tf: 'hello', tf2: 7 } + const doc2 = { a: 8, tf: 'hello', tf2: 6 } + const doc3 = { a: 2, tf: 'bloup', tf2: 3 } + + idx.insert(doc1) + idx.insert(doc2) + idx.insert(doc3) + + // The underlying BST now has 3 nodes which contain the docs where it's expected + idx.tree.getNumberOfKeys().should.equal(3) + assert.deepEqual(idx.tree.search({ tf: 'hello', tf2: 7 }), [{ a: 5, tf: 'hello', tf2: 7 }]) + assert.deepEqual(idx.tree.search({ tf: 'hello', tf2: 6 }), [{ a: 8, tf: 'hello', tf2: 6 }]) + assert.deepEqual(idx.tree.search({ tf: 'bloup', tf2: 3 }), [{ a: 2, tf: 'bloup', tf2: 3 }]) + + // The nodes contain pointers to the actual documents + idx.tree.search({ tf: 'hello', tf2: 6 })[0].should.equal(doc2) + idx.tree.search({ tf: 'bloup', tf2: 3 })[0].a = 42 + doc3.a.should.equal(42) + }) + }) }) // ==== End of 'Insertion' ==== // describe('Removal', function () {