Co-authored-by: Christopher M. Wolff <chris.wolff@mongodb.com> GitOrigin-RevId: 9ed7774e1d1810dc90fd4a6ebd9b325a62758df5
226 lines
9.4 KiB
JavaScript
226 lines
9.4 KiB
JavaScript
/**
|
|
* Tests that $setWindowFields stage reports memory footprint per function when explain is run
|
|
* with verbosities "executionStats" and "allPlansExecution". Also tests that the explain output
|
|
* includes a metric for peak memory usage across the entire stage, including each individual
|
|
* function as well as any other internal state.
|
|
*
|
|
* TODO: Once SERVER-111899 is done and feature flag QueryMemoryTracking is removed, the
|
|
* "multiversion_incompatible" tag can be removed, assuming that there is no further explain
|
|
* output changes based on feature flags in the future.
|
|
*
|
|
* @tags: [
|
|
* multiversion_incompatible,
|
|
* assumes_against_mongod_not_mongos
|
|
* ]
|
|
*/
|
|
import {getAggPlanStages} from "jstests/libs/query/analyze_plan.js";
|
|
import {
|
|
getTotalMemoryUsageFromStageExplainOutput,
|
|
assertExplainMemoryTracking,
|
|
} from "jstests/aggregation/extras/window_function_helpers.js";
|
|
|
|
const coll = db[jsTestName()];
|
|
coll.drop();
|
|
const bigStr = Array(1025).toString(); // 1KB of ','
|
|
const nDocs = 1000;
|
|
const nPartitions = 50;
|
|
// Size was found through logging in 'SpillableDeque' class.
|
|
const docSize = 1292;
|
|
|
|
let bulk = coll.initializeUnorderedBulkOp();
|
|
for (let i = 1; i <= nDocs; i++) {
|
|
bulk.insert({_id: i, key: i % nPartitions, bigStr: bigStr});
|
|
}
|
|
assert.commandWorked(bulk.execute());
|
|
|
|
/**
|
|
* Checks that the execution stats in the explain output for a $setWindowFields stage are as
|
|
* expected.
|
|
* - 'stages' is an array of the explain output of $setWindowFields stages.
|
|
* - 'expectedFunctionMemUsages' is used to check the memory footprint stats for each function.
|
|
* - 'expectedTotalMemUsage' is used to check the peak memory footprint for the entire stage.
|
|
* - 'verbosity' indicates the explain verbosity used.
|
|
*/
|
|
function checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotalMemUsage, verbosity) {
|
|
const stages = getAggPlanStages(coll.explain(verbosity).aggregate(pipeline), "$_internalSetWindowFields");
|
|
for (let stage of stages) {
|
|
assert(stage.hasOwnProperty("$_internalSetWindowFields"), stage);
|
|
|
|
if (verbosity === "executionStats" || verbosity === "allPlansExecution") {
|
|
assert(stage.hasOwnProperty("maxFunctionMemoryUsageBytes"), stage);
|
|
const maxFunctionMemUsages = stage["maxFunctionMemoryUsageBytes"];
|
|
for (let field of Object.keys(maxFunctionMemUsages)) {
|
|
// Ensures that the expected functions are all included and the corresponding
|
|
// memory usage is in a reasonable range.
|
|
if (expectedFunctionMemUsages.hasOwnProperty(field)) {
|
|
// The estimates being passed in are as close as possible to accurate. Leave 10%
|
|
// wiggle room for variants that use different amounts of memory.
|
|
assert.gte(
|
|
maxFunctionMemUsages[field],
|
|
expectedFunctionMemUsages[field] * 0.9,
|
|
"mismatch for function '" + field + "': " + tojson(stage),
|
|
);
|
|
assert.lt(
|
|
maxFunctionMemUsages[field],
|
|
2.2 * expectedFunctionMemUsages[field],
|
|
"mismatch for function '" + field + "': " + tojson(stage),
|
|
);
|
|
}
|
|
}
|
|
|
|
// TODO: Once SERVER-111899 is done and feature flag QueryMemoryTracking is removed, remove this assertion.
|
|
assertExplainMemoryTracking(stage);
|
|
|
|
const totalMemoryUsage = getTotalMemoryUsageFromStageExplainOutput(stage);
|
|
assert.gt(totalMemoryUsage, expectedTotalMemUsage * 0.9, "Incorrect total mem usage: " + tojson(stage));
|
|
assert.lt(totalMemoryUsage, 2.2 * expectedTotalMemUsage, "Incorrect total mem usage: " + tojson(stage));
|
|
} else {
|
|
assert(!stage.hasOwnProperty("maxFunctionMemoryUsageBytes"), stage);
|
|
assert(!stage.hasOwnProperty("maxTotalMemoryUsageBytes"), stage);
|
|
assert(!stage.hasOwnProperty("peakTrackedMemBytes"), stage);
|
|
}
|
|
}
|
|
}
|
|
|
|
(function testQueryPlannerVerbosity() {
|
|
const pipeline = [
|
|
{
|
|
$setWindowFields: {output: {count: {$sum: 1}, push: {$push: "$bigStr"}, set: {$addToSet: "$bigStr"}}},
|
|
},
|
|
];
|
|
const stages = getAggPlanStages(coll.explain("queryPlanner").aggregate(pipeline), "$_internalSetWindowFields");
|
|
checkExplainResult(stages, {}, 0, "queryPlanner");
|
|
})();
|
|
|
|
(function testUnboundedMemUsage() {
|
|
let pipeline = [
|
|
{
|
|
$setWindowFields: {output: {count: {$sum: 1}, push: {$push: "$bigStr"}, set: {$addToSet: "$bigStr"}}},
|
|
},
|
|
];
|
|
|
|
// The $setWindowFields stage "streams" one partition at a time, so there's only one instance of
|
|
// each function. For the default [unbounded, unbounded] window type, each function uses memory
|
|
// usage comparable to it's $group counterpart.
|
|
let expectedFunctionMemUsages = {
|
|
count: 176,
|
|
push: nDocs * 1024,
|
|
set: 1024,
|
|
};
|
|
|
|
// The total mem usage for unbounded windows is the total from each function as well as the size
|
|
// of all documents in the partition.
|
|
let expectedTotal = nDocs * docSize;
|
|
for (let func in expectedFunctionMemUsages) {
|
|
expectedTotal += expectedFunctionMemUsages[func];
|
|
}
|
|
|
|
checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotal, "executionStats");
|
|
checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotal, "allPlansExecution");
|
|
|
|
// Test that the memory footprint is reduced with partitioning.
|
|
pipeline = [
|
|
{
|
|
$setWindowFields: {
|
|
partitionBy: "$key",
|
|
output: {count: {$sum: 1}, push: {$push: "$bigStr"}, set: {$addToSet: "$bigStr"}},
|
|
},
|
|
},
|
|
];
|
|
expectedFunctionMemUsages = {
|
|
count: 176,
|
|
push: (nDocs / nPartitions) * 1056 + 56, // 56 constant state size. Uses 1056 per document.
|
|
set: 1144, // 1024 for the string, rest is constant state.
|
|
};
|
|
// Add one document for the NextPartitionState structure.
|
|
expectedTotal = (nDocs / nPartitions + 1) * docSize;
|
|
for (let func in expectedFunctionMemUsages) {
|
|
expectedTotal += expectedFunctionMemUsages[func];
|
|
}
|
|
|
|
checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotal, "executionStats");
|
|
checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotal, "allPlansExecution");
|
|
})();
|
|
|
|
(function testSlidingWindowMemUsage() {
|
|
const windowSize = 10;
|
|
// The partition iterator will only hold five documents at once. After they are added to the
|
|
// removable document executor they will be released.
|
|
let pipeline = [
|
|
{
|
|
$setWindowFields: {
|
|
sortBy: {_id: 1},
|
|
output: {runningSum: {$sum: "$_id", window: {documents: [0, 9]}}},
|
|
},
|
|
},
|
|
];
|
|
const expectedFunctionMemUsages = {
|
|
// 10 integer values per window, with some extra to account for the $sum accumulator and
|
|
// executor state.
|
|
runningSum: windowSize * 16 + 120,
|
|
};
|
|
|
|
let expectedTotal = windowSize * docSize;
|
|
for (let func in expectedFunctionMemUsages) {
|
|
expectedTotal += expectedFunctionMemUsages[func];
|
|
}
|
|
checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotal, "executionStats");
|
|
checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotal, "allPlansExecution");
|
|
|
|
// Test that a window which also looks behind is able to release documents that are no longer
|
|
// needed, thus reducing the total memory footprint. In this example, only half of the window
|
|
// will be in memory at any point in time.
|
|
pipeline = [
|
|
{
|
|
$setWindowFields: {
|
|
sortBy: {_id: 1},
|
|
output: {runningSum: {$sum: "$_id", window: {documents: [-5, 4]}}},
|
|
},
|
|
},
|
|
];
|
|
expectedTotal = (windowSize / 2) * docSize;
|
|
for (let func in expectedFunctionMemUsages) {
|
|
expectedTotal += expectedFunctionMemUsages[func];
|
|
}
|
|
|
|
checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotal, "executionStats");
|
|
checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotal, "allPlansExecution");
|
|
|
|
// Adding partitioning doesn't change the peak memory usage.
|
|
pipeline = [
|
|
{
|
|
$setWindowFields: {
|
|
partitionBy: "$key",
|
|
sortBy: {_id: 1},
|
|
output: {runningSum: {$sum: "$_id", window: {documents: [-5, 4]}}},
|
|
},
|
|
},
|
|
];
|
|
|
|
checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotal, "executionStats");
|
|
checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotal, "allPlansExecution");
|
|
})();
|
|
|
|
(function testRangeBasedWindowMemUsage() {
|
|
const maxDocsInWindow = 20;
|
|
let pipeline = [
|
|
{
|
|
$setWindowFields: {
|
|
sortBy: {_id: 1},
|
|
output: {pushArray: {$push: "$bigStr", window: {range: [-10, 9]}}},
|
|
},
|
|
},
|
|
];
|
|
// The memory usage is doubled since both the executor and the function state have copies of the
|
|
// large string.
|
|
const expectedFunctionMemUsages = {pushArray: 1024 * maxDocsInWindow * 2};
|
|
|
|
let expectedTotal = maxDocsInWindow * docSize;
|
|
for (let func in expectedFunctionMemUsages) {
|
|
expectedTotal += expectedFunctionMemUsages[func];
|
|
}
|
|
|
|
checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotal, "executionStats");
|
|
checkExplainResult(pipeline, expectedFunctionMemUsages, expectedTotal, "allPlansExecution");
|
|
})();
|