Files
mongo/jstests/aggregation/exec/query_limits_test.js

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

314 lines
9.8 KiB
JavaScript
Raw Normal View History

/**
* Test that larger queries do not fail. This includes larger aggregation pipelines, as well as
* large $match/$project stages, deeply nested paths, and many predicates in an $and/$or.
* The size of these queries was found by trial and error until we reach the BSON size limit.
*
* @tags: [
* # Can't wrap queries in facets without going past max BSON depth.
* do_not_wrap_aggregations_in_facets,
* not_allowed_with_signed_security_token,
* # Can't use multiplanning, as it leads to query serialization that fails because of max BSON
* # size.
SERVER-90484 Large increases in throughput for most expensive variants (#22196) [required variant with jstestshell changes](https://spruce.mongodb.com/version/6643e8bf8571a30007c6562d/tasks?sorts=STATUS%3AASC%3BBASE_STATUS%3ADESC) <- 242 hours [required variant without jstestshell changes](https://spruce.mongodb.com/version/6643e9dd8571a30007c6576b/tasks?sorts=STATUS%3AASC%3BBASE_STATUS%3ADESC) <- 301 hours 20% reduction in compute. [AUBSAN After](https://spruce.mongodb.com/version/6644ecfaf87baf0007e4b124/tasks): 412 hours [AUBSAN Before](https://spruce.mongodb.com/version/6644e4b131a6190007b0526c/tasks) 539 hours 24% reduction in compute [TSAN After](https://spruce.mongodb.com/version/6644ecabe2618d0007a54406/tasks) 356 hours [TSAN Before](https://spruce.mongodb.com/version/6644e47f9b110e0007e9820f/tasks) 580 hours 39% reduction in compute According to project outliers this should reduce our spend on mongodb-mongo-master by (20%*20.32%) = 4.06% (39%*14.82%) = 5.78% (24%*8.73%) = 2.10% **Total: 11.94%** In all these patches we use a jstest shell compiled without debug symbols and statically linked. This increased the startup speed from 1.2 seconds to .02 seconds. Since each javascript test is run with a separate invocation of the shell this speeds up ever javascript test by about 1.2 seconds. The tradeoff is that we are not going to catch tsan or aubsan bugs in the jstestshell and it makes our test running process just that much more complicated. Not included here is another optimization to run batches of javascript tests together to avoid having to re-handshake with the database. [Final PB](https://spruce.mongodb.com/version/664b780cceb3230007a77382/tasks?sorts=STATUS%3AASC%3BBASE_STATUS%3ADESC) showing mostly green. GitOrigin-RevId: db2f54f832512676f6f8159e1267d5ae69aa6b3d
2024-05-20 21:02:21 -07:00
* does_not_support_multiplanning_single_solutions,
* incompatible_aubsan,
* requires_profiling
* ]
*/
import {isSlowBuild} from "jstests/libs/query/aggregation_pipeline_utils.js";
// Only run this test for debug=off opt=on without sanitizers active. With any of these activated,
// the stack frames are larger and can more easily stack overflow.
if (isSlowBuild(db)) {
jsTestLog("Returning early because debug is on, opt is off, or a sanitizer is enabled.");
quit();
}
// This test can cause lots of spam in the slow query logs due to the size of the queries. If an
// error happens, we'll have a backtrace and know which query is the issue, so slow query logs are
// not necessary.
db.setProfilingLevel(0, {slowms: 10000});
const coll = db.query_limits_test;
coll.drop();
// Multikey so we can't apply any non-multikey optimizations to stress as much as possible.
assert.commandWorked(coll.insert({_id: 0, a: [0, 1], b: [2, 3], c: 4, d: 5, object: {}}));
function range(high) {
return [...Array(high).keys()];
}
function runAgg(pipeline) {
// Run pipeline to make sure it doesn't fail.
const result = coll.aggregate(pipeline).toArray();
}
// Construct a {$match: {a: {$in: [0, 1, 2, ...]}}}.
function testLargeIn() {
jsTestLog("Testing large $in");
// Int limit is different than double limit.
const filterValsInts = range(1200000).map((i) => NumberInt(i));
runAgg([{$match: {a: {$in: filterValsInts}}}]);
const filterValsDoubles = range(1000000).map((i) => i * 1.0);
runAgg([{$match: {a: {$in: filterValsDoubles}}}]);
}
// Construct a big $switch statement.
function testLargeSwitch() {
jsTestLog("Testing large $switch");
const cases = range(150000)
.map(function (i) {
return {case: {$gt: ["$a", i]}, then: i};
})
.reverse();
runAgg([{$project: {b: {$switch: {branches: cases, default: 345678}}}}]);
}
// Construct a big $bucket statement.
function testLargeBucket() {
jsTestLog("Testing large $bucket");
let boundaries = [];
for (let i = 0; i < 100000; i++) {
boundaries.push(i);
}
runAgg([
{
$bucket: {
groupBy: "$a",
boundaries: boundaries,
default: "default",
output: {"count": {$sum: 1}},
},
},
]);
}
// Construct a {$project: {a0: 1, a1: 1, ...}}.
function testLargeProject() {
jsTestLog("Testing large $project");
const projectFields = {};
range(1000000).forEach(function (i) {
projectFields["a" + i] = NumberInt(1);
});
runAgg([{$project: projectFields}]);
const pathSize = 195;
let nestedProjectField = "a0";
for (let i = 1; i < pathSize; i++) {
nestedProjectField += ".a" + i;
}
runAgg([{$project: {[nestedProjectField]: 1}}]);
}
// Run $and and $or with many different types of predicates.
function testLargeAndOrPredicates() {
jsTestLog("Testing large $and/$or predicates");
// Large $match of the form {$match: {a0: 1, a1: 1, ...}}
const largeMatch = {};
range(800000).forEach(function (i) {
largeMatch["a" + i] = NumberInt(1);
});
runAgg([{$match: largeMatch}]);
function intStream(n) {
return range(n).map((i) => NumberInt(i));
}
const andOrFilters = [
// Plain a=i filter.
intStream(500000).map(function (i) {
return {a: i};
}),
// a_i = i filter. Different field for each value.
intStream(500000).map(function (i) {
const field = "a" + i;
return {[field]: i};
}),
// Mix of lt and gt with the same field.
intStream(500000).map(function (i) {
const predicate = i % 2 ? {$lt: i} : {$gt: i};
return {a: predicate};
}),
// Mix of lt and gt with different fields.
intStream(400000).map(function (i) {
const field = "a" + i;
const predicate = i % 2 ? {$lt: i} : {$gt: i};
return {[field]: predicate};
}),
// Mix of lt and gt wrapped in not with different fields.
intStream(300000).map(function (i) {
const field = "a" + i;
const predicate = i % 2 ? {$lt: i} : {$gt: i};
return {[field]: {$not: predicate}};
}),
// $exists on different fields.
intStream(400000).map(function (i) {
const field = "a" + i;
return {[field]: {$exists: true}};
}),
intStream(400000).map(function (i) {
const field = "a" + i;
return {[field]: {$exists: false}};
}),
];
for (const m of andOrFilters) {
runAgg([{$match: {$and: m}}]);
runAgg([{$match: {$or: m}}]);
}
}
function testLongFieldNames() {
jsTestLog("Testing $match with long field name");
// Test with a long field name that's accepted by the server.
{
const longFieldName = "a".repeat(10_000_000);
const predicate = {[longFieldName]: 1};
runAgg([{$match: predicate}]);
runAgg([{$match: {$and: [predicate]}}]);
runAgg([{$match: {$or: [predicate]}}]);
}
// Test with a field name that's too long, where the server rejects it.
{
const extraLongFieldName = "a".repeat(17_000_000);
const predicate = {[extraLongFieldName]: 1};
assert.throwsWithCode(() => runAgg([{$match: predicate}]), 17260);
assert.throwsWithCode(() => runAgg([{$match: {$and: [predicate]}}]), 17260);
assert.throwsWithCode(() => runAgg([{$match: {$or: [predicate]}}]), 17260);
}
}
// Test deeply nested queries.
function testDeeplyNestedPath() {
jsTestLog("Testing deeply nested $match");
let deepQuery = {a: {$eq: 1}};
const depth = 72;
for (let i = 0; i < depth; i++) {
deepQuery = {a: {$elemMatch: deepQuery}};
}
runAgg([{$match: deepQuery}]);
}
// Test pipeline length.
function testPipelineLimits() {
jsTestLog("Testing large agg pipelines");
const pipelineLimit = assert.commandWorked(
db.adminCommand({getParameter: 1, internalPipelineLengthLimit: 1}),
).internalPipelineLengthLimit;
let stages = [
{$limit: 1},
{$skip: 1},
{$sort: {a: 1}},
{$unwind: "$a"},
{$match: {a: {$mod: [4, 2]}}},
{$group: {_id: "$a"}},
{$addFields: {c: {$add: ["$c", "$d"]}}},
{$addFields: {a: 5}},
{$project: {a: 1}},
{$match: {a: 1}},
];
for (const stage of stages) {
const pipeline = range(pipelineLimit).map((_) => stage);
jsTestLog(stage);
runAgg(pipeline);
}
}
/*
* Generates a $match query with specified branchingFactor and maxDepth of the form
* {$and: [{$or: [... $and ...]}, ... (length branchingFactor) ...]}
* Uses unique field names across the generated query.
*/
let fieldIndex = 0;
function generateNestedAndOrHelper(type, branchingFactor, maxDepth) {
if (maxDepth === 0) {
const field = "a" + fieldIndex;
const query = {[field]: NumberInt(fieldIndex)};
fieldIndex++;
return query;
}
const oppositeType = type === "$and" ? "$or" : "$and";
const children = [];
for (let i = 0; i < branchingFactor; i++) {
const childQuery = generateNestedAndOrHelper(oppositeType, branchingFactor, maxDepth - 1);
children.push(childQuery);
}
return {[type]: children};
}
function generateNestedAndOr(type, branchingFactor, maxDepth) {
fieldIndex = 0;
return generateNestedAndOrHelper(type, branchingFactor, maxDepth);
}
function testNestedAndOr() {
jsTestLog("Testing nested $and/$or");
for (const topLevelType of ["$and", "$or"]) {
// Test different types of nested queries
let [branchingFactor, maxDepth] = [3, 10];
const deepNarrowQuery = generateNestedAndOr(topLevelType, branchingFactor, maxDepth);
runAgg([{$match: deepNarrowQuery}]);
[branchingFactor, maxDepth] = [10, 5];
const shallowWideQuery = generateNestedAndOr(topLevelType, branchingFactor, maxDepth);
runAgg([{$match: shallowWideQuery}]);
}
}
function testLargeSetFunction() {
jsTestLog("Testing large $setIntersection");
const fieldExprs = [];
for (let j = 1; j <= 750000; j++) {
fieldExprs.push("$a" + j);
}
const pipeline = [{$project: {a: {$setIntersection: fieldExprs}}}, {$group: {_id: "$a"}}];
runAgg(pipeline);
}
function testLargeConcatFunction() {
jsTestLog("Testing large $concat");
const fieldExprs = [];
for (let j = 1; j <= 750000; j++) {
fieldExprs.push("$a" + j);
}
const pipeline = [{$project: {a: {$concat: fieldExprs}}}];
runAgg(pipeline);
}
function testLargeArrayToObjectFunction() {
jsTestLog("Testing large $arrayToObject");
const fieldExprs = [];
for (let j = 1; j <= 200000; j++) {
fieldExprs.push(["a" + j, j]);
}
const pipeline = [{$project: {a: {$arrayToObject: [fieldExprs]}}}];
runAgg(pipeline);
}
const tests = [
testLargeIn,
testLargeSwitch,
testLargeBucket,
testLargeProject,
testLargeAndOrPredicates,
testLongFieldNames,
testDeeplyNestedPath,
testNestedAndOr,
testPipelineLimits,
testLargeSetFunction,
testLargeConcatFunction,
testLargeArrayToObjectFunction,
];
for (const test of tests) {
test();
}