Skip to content

Commit cb78e69

Browse files
committed
fix(encryption): encryption uses smaller batch size
Part of NODE-2255
1 parent 036fdf2 commit cb78e69

File tree

4 files changed

+246
-12
lines changed

4 files changed

+246
-12
lines changed

lib/bulk/common.js

+10-4
Original file line numberDiff line numberDiff line change
@@ -749,14 +749,19 @@ class BulkOperationBase {
749749

750750
// Handle to the bson serializer, used to calculate running sizes
751751
const bson = topology.bson;
752-
753752
// Set max byte size
754753
const isMaster = topology.lastIsMaster();
755-
const maxBatchSizeBytes =
754+
755+
// If we have autoEncryption on, batch-splitting must be done on 2mb chunks, but single documents
756+
// over 2mb are still allowed
757+
const autoEncrypter = topology.s.options && topology.s.options.autoEncrypter;
758+
const maxBsonObjectSize =
756759
isMaster && isMaster.maxBsonObjectSize ? isMaster.maxBsonObjectSize : 1024 * 1024 * 16;
760+
const maxBatchSizeBytes = autoEncrypter ? 1024 * 1024 * 2 : maxBsonObjectSize;
757761
const maxWriteBatchSize =
758762
isMaster && isMaster.maxWriteBatchSize ? isMaster.maxWriteBatchSize : 1000;
759763

764+
760765
// Calculates the largest possible size of an Array key, represented as a BSON string
761766
// element. This calculation:
762767
// 1 byte for BSON type
@@ -805,8 +810,9 @@ class BulkOperationBase {
805810
// Write concern
806811
writeConcern: writeConcern,
807812
// Max batch size options
808-
maxBatchSizeBytes: maxBatchSizeBytes,
809-
maxWriteBatchSize: maxWriteBatchSize,
813+
maxBsonObjectSize,
814+
maxBatchSizeBytes,
815+
maxWriteBatchSize,
810816
maxKeySize,
811817
// Namespace
812818
namespace: namespace,

lib/bulk/ordered.js

+9-4
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ function addToOperationsList(bulkOperation, docType, document) {
2727
});
2828

2929
// Throw error if the doc is bigger than the max BSON size
30-
if (bsonSize >= bulkOperation.s.maxBatchSizeBytes)
31-
throw toError('document is larger than the maximum size ' + bulkOperation.s.maxBatchSizeBytes);
30+
if (bsonSize >= bulkOperation.s.maxBsonObjectSize)
31+
throw toError('document is larger than the maximum size ' + bulkOperation.s.maxBsonObjectSize);
3232

3333
// Create a new batch object if we don't have a current one
3434
if (bulkOperation.s.currentBatch == null)
@@ -38,9 +38,14 @@ function addToOperationsList(bulkOperation, docType, document) {
3838

3939
// Check if we need to create a new batch
4040
if (
41+
// New batch if we exceed the max batch op size
4142
bulkOperation.s.currentBatchSize + 1 >= bulkOperation.s.maxWriteBatchSize ||
42-
bulkOperation.s.currentBatchSizeBytes + maxKeySize + bsonSize >=
43-
bulkOperation.s.maxBatchSizeBytes ||
43+
// New batch if we exceed the maxBatchSizeBytes. Only matters if batch already has a doc,
44+
// since we can't sent an empty batch
45+
(bulkOperation.s.currentBatchSize > 0 &&
46+
bulkOperation.s.currentBatchSizeBytes + maxKeySize + bsonSize >=
47+
bulkOperation.s.maxBatchSizeBytes) ||
48+
// New batch if the new op does not have the same op type as the current batch
4449
bulkOperation.s.currentBatch.batchType !== docType
4550
) {
4651
// Save the batch to the execution stack

lib/bulk/unordered.js

+9-4
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ function addToOperationsList(bulkOperation, docType, document) {
2626
ignoreUndefined: false
2727
});
2828
// Throw error if the doc is bigger than the max BSON size
29-
if (bsonSize >= bulkOperation.s.maxBatchSizeBytes)
30-
throw toError('document is larger than the maximum size ' + bulkOperation.s.maxBatchSizeBytes);
29+
if (bsonSize >= bulkOperation.s.maxBsonObjectSize)
30+
throw toError('document is larger than the maximum size ' + bulkOperation.s.maxBsonObjectSize);
3131
// Holds the current batch
3232
bulkOperation.s.currentBatch = null;
3333
// Get the right type of batch
@@ -47,9 +47,14 @@ function addToOperationsList(bulkOperation, docType, document) {
4747

4848
// Check if we need to create a new batch
4949
if (
50+
// New batch if we exceed the max batch op size
5051
bulkOperation.s.currentBatch.size + 1 >= bulkOperation.s.maxWriteBatchSize ||
51-
bulkOperation.s.currentBatch.sizeBytes + maxKeySize + bsonSize >=
52-
bulkOperation.s.maxBatchSizeBytes ||
52+
// New batch if we exceed the maxBatchSizeBytes. Only matters if batch already has a doc,
53+
// since we can't sent an empty batch
54+
(bulkOperation.s.currentBatch.size > 0 &&
55+
bulkOperation.s.currentBatch.sizeBytes + maxKeySize + bsonSize >=
56+
bulkOperation.s.maxBatchSizeBytes) ||
57+
// New batch if the new op does not have the same op type as the current batch
5358
bulkOperation.s.currentBatch.batchType !== docType
5459
) {
5560
// Save the batch to the execution stack

test/functional/client_side_encryption_prose_tests.js

+218
Original file line numberDiff line numberDiff line change
@@ -464,5 +464,223 @@ describe(
464464
});
465465
});
466466
});
467+
468+
describe('BSON size limits and batch splitting', function() {
469+
const fs = require('fs');
470+
const path = require('path');
471+
const EJSON = require('mongodb-extjson');
472+
function loadLimits(file) {
473+
return EJSON.parse(
474+
fs.readFileSync(path.resolve(__dirname, 'spec', 'client-side-encryption', 'limits', file))
475+
);
476+
}
477+
478+
const limitsSchema = loadLimits('limits-schema.json');
479+
const limitsKey = loadLimits('limits-key.json');
480+
const limitsDoc = loadLimits('limits-doc.json');
481+
482+
before(function() {
483+
// First, perform the setup.
484+
485+
// #. Create a MongoClient without encryption enabled (referred to as ``client``).
486+
this.client = this.configuration.newClient(
487+
{},
488+
{ useNewUrlParser: true, useUnifiedTopology: true }
489+
);
490+
491+
this.events = new Set();
492+
493+
return (
494+
this.client
495+
.connect()
496+
// #. Using ``client``, drop and create the collection ``db.coll`` configured with the included JSON schema `limits/limits-schema.json <../limits/limits-schema.json>`_.
497+
.then(() => {
498+
return this.client
499+
.db(dataDbName)
500+
.dropCollection(dataCollName)
501+
.catch(noop);
502+
})
503+
.then(() => {
504+
return this.client.db(dataDbName).createCollection(dataCollName, {
505+
validator: { $jsonSchema: limitsSchema }
506+
});
507+
})
508+
// #. Using ``client``, drop the collection ``admin.datakeys``. Insert the document `limits/limits-key.json <../limits/limits-key.json>`_
509+
.then(() => {
510+
return this.client
511+
.db(keyVaultDbName)
512+
.dropCollection(keyVaultCollName)
513+
.catch(noop);
514+
})
515+
.then(() => {
516+
return this.client
517+
.db(keyVaultDbName)
518+
.collection(keyVaultCollName)
519+
.insertOne(limitsKey);
520+
})
521+
);
522+
});
523+
524+
beforeEach(function() {
525+
// #. Create a MongoClient configured with auto encryption (referred to as ``client_encrypted``)
526+
// Configure with the ``local`` KMS provider as follows:
527+
// .. code:: javascript
528+
// { "local": { "key": <base64 decoding of LOCAL_MASTERKEY> } }
529+
// Configure with the ``keyVaultNamespace`` set to ``admin.datakeys``.
530+
this.clientEncrypted = this.configuration.newClient(
531+
{},
532+
{
533+
useNewUrlParser: true,
534+
useUnifiedTopology: true,
535+
monitorCommands: true,
536+
autoEncryption: {
537+
keyVaultNamespace,
538+
kmsProviders
539+
}
540+
}
541+
);
542+
return this.clientEncrypted.connect().then(() => {
543+
this.encryptedColl = this.clientEncrypted.db(dataDbName).collection(dataCollName);
544+
this.events.clear();
545+
this.clientEncrypted.on('commandStarted', e => {
546+
if (e.commandName === 'insert') {
547+
this.events.add(e);
548+
}
549+
});
550+
});
551+
});
552+
553+
afterEach(function() {
554+
if (this.clientEncrypted) {
555+
this.clientEncrypted.removeAllListeners('commandStarted');
556+
return this.clientEncrypted.close();
557+
}
558+
});
559+
560+
after(function() {
561+
return this.client && this.client.close();
562+
});
563+
564+
// Using ``client_encrypted`` perform the following operations:
565+
566+
function repeatedChar(char, length) {
567+
return Array.from({ length })
568+
.map(() => char)
569+
.join('');
570+
}
571+
572+
const testCases = [
573+
// #. Insert ``{ "_id": "over_2mib_under_16mib", "unencrypted": <the string "a" repeated 2097152 times> }``.
574+
// Expect this to succeed since this is still under the ``maxBsonObjectSize`` limit.
575+
{
576+
description: 'should succeed for over_2mib_under_16mib',
577+
docs: () => [{ _id: 'over_2mib_under_16mib', unencrypted: repeatedChar('a', 2097152) }],
578+
expectedEvents: [{ commandName: 'insert' }]
579+
},
580+
// #. Insert the document `limits/limits-doc.json <../limits/limits-doc.json>`_ concatenated with ``{ "_id": "encryption_exceeds_2mib", "unencrypted": < the string "a" repeated (2097152 - 2000) times > }``
581+
// Note: limits-doc.json is a 1005 byte BSON document that encrypts to a ~10,000 byte document.
582+
// Expect this to succeed since after encryption this still is below the normal maximum BSON document size.
583+
// Note, before auto encryption this document is under the 2 MiB limit. After encryption it exceeds the 2 MiB limit, but does NOT exceed the 16 MiB limit.
584+
{
585+
description: 'should succeed for encryption_exceeds_2mib',
586+
docs: () => [
587+
Object.assign({}, limitsDoc, {
588+
_id: 'encryption_exceeds_2mib',
589+
unencrypted: repeatedChar('a', 2097152 - 2000)
590+
})
591+
],
592+
expectedEvents: [{ commandName: 'insert' }]
593+
},
594+
// #. Bulk insert the following:
595+
// - ``{ "_id": "over_2mib_1", "unencrypted": <the string "a" repeated (2097152) times> }``
596+
// - ``{ "_id": "over_2mib_2", "unencrypted": <the string "a" repeated (2097152) times> }``
597+
// Expect the bulk write to succeed and split after first doc (i.e. two inserts occur). This may be verified using `command monitoring <https://github.com./mongodb/specifications/tree/master/source/command-monitoring/command-monitoring.rst>`_.
598+
{
599+
description: 'should succeed for bulk over_2mib',
600+
docs: () => [
601+
{ _id: 'over_2mib_1', unencrypted: repeatedChar('a', 2097152) },
602+
{ _id: 'over_2mib_2', unencrypted: repeatedChar('a', 2097152) }
603+
],
604+
expectedEvents: [{ commandName: 'insert' }, { commandName: 'insert' }]
605+
},
606+
// #. Bulk insert the following:
607+
// - The document `limits/limits-doc.json <../limits/limits-doc.json>`_ concatenated with ``{ "_id": "encryption_exceeds_2mib_1", "unencrypted": < the string "a" repeated (2097152 - 2000) times > }``
608+
// - The document `limits/limits-doc.json <../limits/limits-doc.json>`_ concatenated with ``{ "_id": "encryption_exceeds_2mib_2", "unencrypted": < the string "a" repeated (2097152 - 2000) times > }``
609+
// Expect the bulk write to succeed and split after first doc (i.e. two inserts occur). This may be verified using `command monitoring <https://github.com./mongodb/specifications/tree/master/source/command-monitoring/command-monitoring.rst>`_.
610+
{
611+
description: 'should succeed for bulk encryption_exceeds_2mib',
612+
docs: () => [
613+
Object.assign({}, limitsDoc, {
614+
_id: 'encryption_exceeds_2mib_1',
615+
unencrypted: repeatedChar('a', 2097152 - 2000)
616+
}),
617+
Object.assign({}, limitsDoc, {
618+
_id: 'encryption_exceeds_2mib_2',
619+
unencrypted: repeatedChar('a', 2097152 - 2000)
620+
})
621+
],
622+
expectedEvents: [{ commandName: 'insert' }, { commandName: 'insert' }]
623+
},
624+
// #. Insert ``{ "_id": "under_16mib", "unencrypted": <the string "a" repeated 16777216 - 2000 times>``.
625+
// Expect this to succeed since this is still (just) under the ``maxBsonObjectSize`` limit.
626+
{
627+
description: 'should succeed for under_16mib',
628+
docs: () => [{ _id: 'under_16mib', unencrypted: repeatedChar('a', 16777216 - 2000) }],
629+
expectedEvents: [{ commandName: 'insert' }]
630+
},
631+
// #. Insert the document `limits/limits-doc.json <../limits/limits-doc.json>`_ concatenated with ``{ "_id": "encryption_exceeds_16mib", "unencrypted": < the string "a" repeated (16777216 - 2000) times > }``
632+
// Expect this to fail since encryption results in a document exceeding the ``maxBsonObjectSize`` limit.
633+
{
634+
description: 'should fail for encryption_exceeds_16mib',
635+
docs: () => [
636+
Object.assign({}, limitsDoc, {
637+
_id: 'encryption_exceeds_16mib',
638+
unencrypted: repeatedChar('a', 16777216 - 2000)
639+
})
640+
],
641+
error: true
642+
}
643+
];
644+
645+
testCases.forEach(testCase => {
646+
it(testCase.description, function() {
647+
return this.encryptedColl.insertMany(testCase.docs()).then(
648+
() => {
649+
if (testCase.error) {
650+
throw new Error('Expected this insert to fail, but it succeeded');
651+
}
652+
const expectedEvents = Array.from(testCase.expectedEvents);
653+
const actualEvents = pruneEvents(this.events);
654+
655+
expect(actualEvents)
656+
.to.have.a.lengthOf(expectedEvents.length)
657+
.and.to.containSubset(expectedEvents);
658+
},
659+
err => {
660+
if (!testCase.error) {
661+
throw err;
662+
}
663+
}
664+
);
665+
});
666+
});
667+
668+
function pruneEvents(events) {
669+
return Array.from(events).map(event => {
670+
// We are pruning out the bunch of repeating As, mostly
671+
// b/c an error failure will try to print 2mb of 'a's
672+
// and not have a good time.
673+
event.command = Object.assign({}, event.command);
674+
event.command.documents = event.command.documents.map(doc => {
675+
doc = Object.assign({}, doc);
676+
if (doc.unencrypted) {
677+
doc.unencrypted = "Lots of repeating 'a's";
678+
}
679+
return doc;
680+
});
681+
return event;
682+
});
683+
}
684+
});
467685
}
468686
);

0 commit comments

Comments
 (0)