implement compound indexes

pull/27/head
Loïc Hermann 2 years ago
parent 1d5c3faec9
commit fbe2c828e7
  1. 16
      README.md
  2. 6
      index.d.ts
  3. 29
      lib/datastore.js
  4. 6
      lib/indexes.js
  5. 18
      lib/model.js
  6. 2
      lib/persistence.js
  7. 21
      lib/utils.js
  8. 73
      test/db.async.test.js
  9. 106
      test/db.test.js
  10. 65
      test/indexes.test.js

@ -144,7 +144,7 @@ If the document does not contain an `_id` field, NeDB will automatically
generate one for you (a 16-characters alphanumerical string). The `_id` of a
document, once set, cannot be modified.
Field names cannot begin by '$' or contain a '.'.
Field names cannot start with '$' or contain the characters '.' and ','.
```javascript
const doc = {
@ -698,6 +698,15 @@ fields in nested documents using the dot notation. For now, indexes are only
used to speed up basic queries and queries using `$in`, `$lt`, `$lte`, `$gt`
and `$gte`. The indexed values cannot be of type array of object.
**Breaking change**: [since v3.2.0](./CHANGELOG.md), comma can no longer be used in indexed field names.
The following is illegal:
```javascript
db.ensureIndexAsync({ fieldName: 'some,field' })
db.ensureIndexAsync({ fieldName: ['some,field', 'other,field'] })
```
This is a side effect of the compound index implementation.
To create an index, use [`datastore#ensureIndexAsync(options)`](./API.md#Datastore+ensureIndexAsync).
It resolves when the index is persisted on disk (if the database is persistent)
and may throw an Error (usually a unique constraint that was violated). It can
@ -705,7 +714,7 @@ be called when you want, even after some data was inserted, though it's best to
call it at application startup. The options are:
* **fieldName** (required): name of the field to index. Use the dot notation to
index a field in a nested document.
index a field in a nested document. For a compound index, use an array of field names.
* **unique** (optional, defaults to `false`): enforce field uniqueness.
* **sparse** (optional, defaults to `false`): don't index documents for which
the field is not defined.
@ -735,6 +744,9 @@ await db.ensureIndexAsync({
sparse: true
})
// Using a compound index
await db.ensureIndexAsync({ fieldName: ["field1", "field2"] });
try {
// Format of the error message when the unique constraint is not met
await db.insertAsync({ somefield: '@seald-io/nedb' })

6
index.d.ts vendored

@ -42,9 +42,9 @@ declare class Nedb<G = any> extends EventEmitter {
ensureIndexAsync(options: Nedb.EnsureIndexOptions): Promise<void>;
removeIndex(fieldName: string, callback?: (err: Error | null) => void): void;
removeIndex(fieldName: string | string[], callback?: (err: Error | null) => void): void;
removeIndexAsync(fieldName: string): Promise<void>;
removeIndexAsync(fieldName: string | string[]): Promise<void>;
insert<T extends G>(newDoc: T, callback?: (err: Error | null, document: T) => void): void;
insert<T extends G>(newDocs: T[], callback?: (err: Error | null, documents: T[]) => void): void;
@ -128,7 +128,7 @@ declare namespace Nedb {
}
interface EnsureIndexOptions {
fieldName: string;
fieldName: string | string[];
unique?: boolean;
sparse?: boolean;
expireAfterSeconds?: number;

@ -6,7 +6,7 @@ const Executor = require('./executor.js')
const Index = require('./indexes.js')
const model = require('./model.js')
const Persistence = require('./persistence.js')
const { isDate } = require('./utils.js')
const { isDate, pick, filterIndexNames } = require('./utils.js')
/**
* Callback with no parameter
@ -449,7 +449,7 @@ class Datastore extends EventEmitter {
* executor.
* @param {object} options
* @param {string} options.fieldName Name of the field to index. Use the dot notation to index a field in a nested
* document.
* document. For a compound index, use an array of field names. Using a comma in a field name is not permitted.
* @param {boolean} [options.unique = false] Enforce field uniqueness. Note that a unique index will raise an error
* if you try to index two documents for which the field is not defined.
* @param {boolean} [options.sparse = false] Don't index documents for which the field is not defined. Use this option
@ -465,6 +465,12 @@ class Datastore extends EventEmitter {
err.missingFieldName = true
throw err
}
if (Array.isArray(options.fieldName)) {
options.fieldName.sort()
}
if ([].concat(options.fieldName).some(field => field.includes(','))) {
throw new Error('Cannot use comma in index fieldName')
}
if (this.indexes[options.fieldName]) return
this.indexes[options.fieldName] = new Index(options)
@ -598,16 +604,27 @@ class Datastore extends EventEmitter {
*/
_getRawCandidates (query) {
const indexNames = Object.keys(this.indexes)
// STEP 1: get candidates list by checking indexes from most to least frequent usecase
// For a basic match
let usableQuery
usableQuery = Object.entries(query)
.filter(([k, v]) =>
!!(typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean' || isDate(v) || v === null) &&
indexNames.includes(k)
)
.filter(filterIndexNames(indexNames))
.pop()
if (usableQuery) return this.indexes[usableQuery[0]].getMatching(usableQuery[1])
// For a compound match
const compoundQueryKeys = indexNames
.filter(indexName => indexName.indexOf(',') !== -1)
.map(indexName => indexName.split(','))
.filter(subIndexNames =>
Object.entries(query)
.filter(filterIndexNames(subIndexNames)).length === subIndexNames.length
)
if (compoundQueryKeys.length > 0) return this.indexes[compoundQueryKeys[0]].getMatching(pick(query, compoundQueryKeys[0]))
// For a $in match
usableQuery = Object.entries(query)
.filter(([k, v]) =>

@ -99,10 +99,10 @@ class Index {
return
}
const key = model.getDotValue(doc, this.fieldName)
const key = model.getDotValues(doc, this.fieldName)
// We don't index documents that don't contain the field if the index is sparse
if (key === undefined && this.sparse) return
if ((key === undefined || (typeof key === 'object' && key !== null && Object.values(key).every(el => el === undefined))) && this.sparse) return
if (!Array.isArray(key)) this.tree.insert(key, doc)
else {
@ -171,7 +171,7 @@ class Index {
return
}
const key = model.getDotValue(doc, this.fieldName)
const key = model.getDotValues(doc, this.fieldName)
if (key === undefined && this.sparse) return

@ -503,6 +503,23 @@ const getDotValue = (obj, field) => {
} else return getDotValue(obj[fieldParts[0]], fieldParts.slice(1))
}
/**
* Get dot values for either a bunch of fields or just one.
*/
const getDotValues = (obj, fields) => {
if (Array.isArray(fields)) {
const key = {}
const len = fields.length
for (let i = 0; i < len; i++) {
const field = fields[i]
key[field] = getDotValue(obj, field)
}
return key
} else {
return getDotValue(obj, fields)
}
}
/**
* Check whether 'things' are equal
* Things are defined as any native types (string, number, boolean, null, date) and objects
@ -807,6 +824,7 @@ module.exports.checkObject = checkObject
module.exports.isPrimitiveType = isPrimitiveType
module.exports.modify = modify
module.exports.getDotValue = getDotValue
module.exports.getDotValues = getDotValues
module.exports.match = match
module.exports.areThingsEqual = areThingsEqual
module.exports.compareThings = compareThings

@ -107,7 +107,7 @@ class Persistence {
if (fieldName !== '_id') { // The special _id index is managed by datastore.js, the others need to be persisted
lines.push(this.afterSerialization(model.serialize({
$$indexCreated: {
fieldName: fieldName,
fieldName: this.db.indexes[fieldName].fieldName,
unique: this.db.indexes[fieldName].unique,
sparse: this.db.indexes[fieldName].sparse
}

@ -58,6 +58,27 @@ const isDate = d => isObject(d) && Object.prototype.toString.call(d) === '[objec
*/
const isRegExp = re => isObject(re) && Object.prototype.toString.call(re) === '[object RegExp]'
/**
* return a copy of the object that filtered using the given keys
*
* @param {*} object
* @param {*} keys
* @returns
*/
const pick = (object, keys) => {
return keys.reduce((obj, key) => {
if (object && Object.prototype.hasOwnProperty.call(object, key)) {
obj[key] = object[key]
}
return obj
}, {})
}
const filterIndexNames = (indexNames) => ([k, v]) => !!(typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean' || isDate(v) || v === null) &&
indexNames.includes(k)
module.exports.uniq = uniq
module.exports.isDate = isDate
module.exports.isRegExp = isRegExp
module.exports.pick = pick
module.exports.filterIndexNames = filterIndexNames

@ -309,6 +309,18 @@ describe('Database async', function () {
assert.deepEqual(doc2, { _id: doc2._id, tf: 4, an: 'other' })
})
it('Can use a compound index to get docs with a basic match', async () => {
await d.ensureIndexAsync({ fieldName: ['tf', 'tg'] })
await d.insertAsync({ tf: 4, tg: 0, foo: 1 })
await d.insertAsync({ tf: 6, tg: 0, foo: 2 })
const _doc1 = await d.insertAsync({ tf: 4, tg: 1, foo: 3 })
await d.insertAsync({ tf: 6, tg: 1, foo: 4 })
const data = await d._getCandidatesAsync({ tf: 4, tg: 1 })
const doc1 = data.find(d => d._id === _doc1._id)
assert.equal(data.length, 1)
assert.deepEqual(doc1, { _id: doc1._id, tf: 4, tg: 1, foo: 3 })
})
it('Can use an index to get docs with a $in match', async () => {
await d.ensureIndexAsync({ fieldName: 'tf' })
await d.insertAsync({ tf: 4 })
@ -1315,6 +1327,67 @@ describe('Database async', function () {
assert.equal(d.indexes.planet.getAll().length, 2)
})
it('ensureIndex can be called twice on the same compound field, the second call will ahve no effect', async () => {
assert.equal(Object.keys(d.indexes).length, 1)
assert.equal(Object.keys(d.indexes)[0], '_id')
await d.insertAsync({ star: 'sun', planet: 'Earth' })
await d.insertAsync({ star: 'sun', planet: 'Mars' })
const docs = await d.findAsync({})
assert.equal(docs.length, 2)
await d.ensureIndexAsync({ fieldName: ['star', 'planet'] })
assert.equal(Object.keys(d.indexes).length, 2)
assert.equal(Object.keys(d.indexes)[0], '_id')
assert.equal(Object.keys(d.indexes)[1], 'planet,star')
assert.equal(d.indexes['planet,star'].getAll().length, 2)
// This second call has no effect, documents don't get inserted twice in the index
await d.ensureIndexAsync({ fieldName: ['star', 'planet'] })
assert.equal(Object.keys(d.indexes).length, 2)
assert.equal(Object.keys(d.indexes)[0], '_id')
assert.equal(Object.keys(d.indexes)[1], 'planet,star')
assert.equal(d.indexes['planet,star'].getAll().length, 2)
})
it('ensureIndex can be called twice on the same compound field with a different order, the second call will ahve no effect', async () => {
assert.equal(Object.keys(d.indexes).length, 1)
assert.equal(Object.keys(d.indexes)[0], '_id')
await d.insertAsync({ star: 'sun', planet: 'Earth' })
await d.insertAsync({ star: 'sun', planet: 'Mars' })
const docs = await d.findAsync({})
assert.equal(docs.length, 2)
await d.ensureIndexAsync({ fieldName: ['star', 'planet'] })
assert.equal(Object.keys(d.indexes).length, 2)
assert.equal(Object.keys(d.indexes)[0], '_id')
assert.equal(Object.keys(d.indexes)[1], 'planet,star')
assert.equal(d.indexes['planet,star'].getAll().length, 2)
// This second call has no effect, documents don't get inserted twice in the index
await d.ensureIndexAsync({ fieldName: ['planet', 'star'] })
assert.equal(Object.keys(d.indexes).length, 2)
assert.equal(Object.keys(d.indexes)[0], '_id')
assert.equal(Object.keys(d.indexes)[1], 'planet,star')
assert.equal(d.indexes['planet,star'].getAll().length, 2)
})
it('ensureIndex cannot be called with an illegal field name', async () => {
await assert.rejects(() => d.ensureIndexAsync({ fieldName: 'star,planet' }), err => {
assert.notEqual(err, null)
return true
})
await assert.rejects(() => d.ensureIndexAsync({ fieldName: ['star,planet', 'other'] }), err => {
assert.notEqual(err, null)
return true
})
})
it('ensureIndex can be called after the data set was modified and the index still be correct', async () => {
const rawData = model.serialize({ _id: 'aaa', z: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
model.serialize({ _id: 'bbb', z: '2', hello: 'world' })

@ -490,6 +490,30 @@ describe('Database', function () {
})
})
it('Can use a compound index to get docs with a basic match', function (done) {
// eslint-disable-next-line node/handle-callback-err
d.ensureIndex({ fieldName: ['tf', 'tg'] }, function (err) {
d.insert({ tf: 4, tg: 0, foo: 1 }, function () {
d.insert({ tf: 6, tg: 0, foo: 2 }, function () {
// eslint-disable-next-line node/handle-callback-err
d.insert({ tf: 4, tg: 1, foo: 3 }, function (err, _doc1) {
d.insert({ tf: 6, tg: 1, foo: 4 }, function () {
// eslint-disable-next-line node/handle-callback-err
callbackify(query => d._getCandidatesAsync(query))({ tf: 4, tg: 1 }, function (err, data) {
const doc1 = data.find(function (d) { return d._id === _doc1._id })
data.length.should.equal(1)
assert.deepEqual(doc1, { _id: doc1._id, tf: 4, tg: 1, foo: 3 })
done()
})
})
})
})
})
})
})
it('Can use an index to get docs with a $in match', function (done) {
// eslint-disable-next-line node/handle-callback-err
d.ensureIndex({ fieldName: 'tf' }, function (err) {
@ -2082,7 +2106,7 @@ describe('Database', function () {
})
})
it('ensureIndex can be called twice on the same field, the second call will ahve no effect', function (done) {
it('ensureIndex can be called twice on the same field, the second call will have no effect', function (done) {
Object.keys(d.indexes).length.should.equal(1)
Object.keys(d.indexes)[0].should.equal('_id')
@ -2117,6 +2141,86 @@ describe('Database', function () {
})
})
it('ensureIndex can be called twice on the same compound fields, the second call will have no effect', function (done) {
Object.keys(d.indexes).length.should.equal(1)
Object.keys(d.indexes)[0].should.equal('_id')
d.insert({ star: 'sun', planet: 'Earth' }, function () {
d.insert({ star: 'sun', planet: 'Mars' }, function () {
// eslint-disable-next-line node/handle-callback-err
d.find({}, function (err, docs) {
docs.length.should.equal(2)
d.ensureIndex({ fieldName: ['star', 'planet'] }, function (err) {
assert.isNull(err)
Object.keys(d.indexes).length.should.equal(2)
Object.keys(d.indexes)[0].should.equal('_id')
Object.keys(d.indexes)[1].should.equal('planet,star')
d.indexes['planet,star'].getAll().length.should.equal(2)
// This second call has no effect, documents don't get inserted twice in the index
d.ensureIndex({ fieldName: ['star', 'planet'] }, function (err) {
assert.isNull(err)
Object.keys(d.indexes).length.should.equal(2)
Object.keys(d.indexes)[0].should.equal('_id')
Object.keys(d.indexes)[1].should.equal('planet,star')
d.indexes['planet,star'].getAll().length.should.equal(2)
done()
})
})
})
})
})
})
it('ensureIndex can be called twice on the same compound fields with a different order, the second call will have no effect', function (done) {
Object.keys(d.indexes).length.should.equal(1)
Object.keys(d.indexes)[0].should.equal('_id')
d.insert({ star: 'sun', planet: 'Earth' }, function () {
d.insert({ star: 'sun', planet: 'Mars' }, function () {
// eslint-disable-next-line node/handle-callback-err
d.find({}, function (err, docs) {
docs.length.should.equal(2)
d.ensureIndex({ fieldName: ['star', 'planet'] }, function (err) {
assert.isNull(err)
Object.keys(d.indexes).length.should.equal(2)
Object.keys(d.indexes)[0].should.equal('_id')
Object.keys(d.indexes)[1].should.equal('planet,star')
d.indexes['planet,star'].getAll().length.should.equal(2)
// This second call has no effect, documents don't get inserted twice in the index
d.ensureIndex({ fieldName: ['planet', 'star'] }, function (err) {
assert.isNull(err)
Object.keys(d.indexes).length.should.equal(2)
Object.keys(d.indexes)[0].should.equal('_id')
Object.keys(d.indexes)[1].should.equal('planet,star')
d.indexes['planet,star'].getAll().length.should.equal(2)
done()
})
})
})
})
})
})
it('ensureIndex cannot be called with an illegal field name', function (done) {
d.ensureIndex({ fieldName: 'star,planet' }, function (err) {
assert.isNotNull(err)
d.ensureIndex({ fieldName: ['star,planet', 'other'] }, function (err) {
assert.isNotNull(err)
done()
})
})
})
it('ensureIndex can be called after the data set was modified and the index still be correct', function (done) {
const rawData = model.serialize({ _id: 'aaa', z: '1', a: 2, ages: [1, 5, 12] }) + '\n' +
model.serialize({ _id: 'bbb', z: '2', hello: 'world' })

@ -29,6 +29,28 @@ describe('Indexes', function () {
doc3.a.should.equal(42)
})
it('Can insert pointers to documents in the index correctly when they have compound fields', function () {
const idx = new Index({ fieldName: ['tf', 'tg'] })
const doc1 = { a: 5, tf: 'hello', tg: 'world' }
const doc2 = { a: 8, tf: 'hello', tg: 'bloup' }
const doc3 = { a: 2, tf: 'bloup', tg: 'bloup' }
idx.insert(doc1)
idx.insert(doc2)
idx.insert(doc3)
// The underlying BST now has 3 nodes which contain the docs where it's expected
idx.tree.getNumberOfKeys().should.equal(3)
assert.deepEqual(idx.tree.search({ tf: 'hello', tg: 'world' }), [{ a: 5, tf: 'hello', tg: 'world' }])
assert.deepEqual(idx.tree.search({ tf: 'hello', tg: 'bloup' }), [{ a: 8, tf: 'hello', tg: 'bloup' }])
assert.deepEqual(idx.tree.search({ tf: 'bloup', tg: 'bloup' }), [{ a: 2, tf: 'bloup', tg: 'bloup' }])
// The nodes contain pointers to the actual documents
idx.tree.search({ tf: 'hello', tg: 'bloup' })[0].should.equal(doc2)
idx.tree.search({ tf: 'bloup', tg: 'bloup' })[0].a = 42
doc3.a.should.equal(42)
})
it('Inserting twice for the same fieldName in a unique index will result in an error thrown', function () {
const idx = new Index({ fieldName: 'tf', unique: true })
const doc1 = { a: 5, tf: 'hello' }
@ -58,6 +80,25 @@ describe('Indexes', function () {
idx.tree.getNumberOfKeys().should.equal(0) // Docs are not indexed
})
it('Inserting twice for the same compound fieldName in a unique index will result in an error thrown', function () {
const idx = new Index({ fieldName: ['tf', 'tg'], unique: true })
const doc1 = { a: 5, tf: 'hello', tg: 'world' }
idx.insert(doc1)
idx.tree.getNumberOfKeys().should.equal(1);
(function () { idx.insert(doc1) }).should.throw()
})
it('Inserting twice for a compound fieldName the docs dont have with a unique and sparse index will not throw, since the docs will be non indexed', function () {
const idx = new Index({ fieldName: ['nope', 'nopeNope'], unique: true, sparse: true })
const doc1 = { a: 5, tf: 'hello' }
const doc2 = { a: 5, tf: 'world' }
idx.insert(doc1)
idx.insert(doc2)
idx.tree.getNumberOfKeys().should.equal(0) // Docs are not indexed
})
it('Works with dot notation', function () {
const idx = new Index({ fieldName: 'tf.nested' })
const doc1 = { a: 5, tf: { nested: 'hello' } }
@ -202,6 +243,30 @@ describe('Indexes', function () {
idx.getMatching('ee').length.should.equal(0)
})
}) // ==== End of 'Array fields' ==== //
describe('Compound Indexes', function () {
it('Supports arrays of fieldNames', function () {
const idx = new Index({ fieldName: ['tf', 'tf2'] })
const doc1 = { a: 5, tf: 'hello', tf2: 7 }
const doc2 = { a: 8, tf: 'hello', tf2: 6 }
const doc3 = { a: 2, tf: 'bloup', tf2: 3 }
idx.insert(doc1)
idx.insert(doc2)
idx.insert(doc3)
// The underlying BST now has 3 nodes which contain the docs where it's expected
idx.tree.getNumberOfKeys().should.equal(3)
assert.deepEqual(idx.tree.search({ tf: 'hello', tf2: 7 }), [{ a: 5, tf: 'hello', tf2: 7 }])
assert.deepEqual(idx.tree.search({ tf: 'hello', tf2: 6 }), [{ a: 8, tf: 'hello', tf2: 6 }])
assert.deepEqual(idx.tree.search({ tf: 'bloup', tf2: 3 }), [{ a: 2, tf: 'bloup', tf2: 3 }])
// The nodes contain pointers to the actual documents
idx.tree.search({ tf: 'hello', tf2: 6 })[0].should.equal(doc2)
idx.tree.search({ tf: 'bloup', tf2: 3 })[0].a = 42
doc3.a.should.equal(42)
})
})
}) // ==== End of 'Insertion' ==== //
describe('Removal', function () {

Loading…
Cancel
Save