Files
mongo/jstests/with_mongot/e2e_lib/explain_utils.js
Zac 591928c619 SERVER-108478 JS formatted by prettier and remove clang-format (#39656)
GitOrigin-RevId: 6c8f6aded47f260aa4f7c231b17dae3302cb1e04
2025-08-21 17:27:09 +00:00

380 lines
18 KiB
JavaScript

/**
* Utility functions for explain() results of search + non-search queries.
*/
import {arrayEq} from "jstests/aggregation/extras/utils.js";
import {getAggPlanStages, getLookupStage, getUnionWithStage} from "jstests/libs/query/analyze_plan.js";
import {
prepareUnionWithExplain,
validateMongotStageExplainExecutionStats,
verifyShardsPartExplainOutput,
} from "jstests/with_mongot/common_utils.js";
/**
* This function checks that the explain output contains the view pipeline $_internalSearchIdLookup.
* This should only be the case when there is a $search stage on the top-level of the user
* pipeline.
*
* @param {Object} explainOutput The explain output from the aggregation.
* @param {Array} viewPipeline The view pipeline to verify existence of in $_internalSearchIdLookup.
*/
function assertIdLookupContainsViewPipeline(explainOutput, viewPipeline) {
const stages = getAggPlanStages(explainOutput, "$_internalSearchIdLookup");
assert(
stages.length > 0,
"There should be at least one stage corresponding to $_internalSearchIdLookup in the explain output. " +
tojson(explainOutput),
);
for (let stage of stages) {
const idLookupFullSubPipe = stage["$_internalSearchIdLookup"]["subPipeline"];
// The _idLookup subPipeline should be a $match on _id followed by the view stages.
const idLookupStage = {"$match": {"_id": {"$eq": "_id placeholder"}}};
assert.eq(idLookupFullSubPipe[0], idLookupStage);
// Make sure that idLookup subpipeline contains all of the view stages.
const idLookupViewStages = idLookupFullSubPipe.length > 1 ? idLookupFullSubPipe.slice(1) : [];
assert.eq(idLookupViewStages.length, viewPipeline.length);
for (let i = 0; i < idLookupViewStages.length; i++) {
const stageName = Object.keys(viewPipeline[i])[0];
assert(idLookupViewStages[i].hasOwnProperty(stageName));
}
}
}
/**
* If the top-level aggregation contains a mongot stage, it asserts that the view transforms are
* contained in _idLookup's subpipeline. If the top-level aggregation doesn't have a mongot stage,
* it asserts that the view stages were applied to the beginning of the top-level pipeline.
*
* @param {Array} explainOutput The explain output from the aggregation.
* @param {Array} userPipeline The request/query that was run on the view.
* @param {Array} viewPipeline The pipeline used to define the view.
*/
export function assertViewAppliedCorrectly(explainOutput, userPipeline, viewPipeline) {
if (
userPipeline.length > 0 &&
(userPipeline[0].hasOwnProperty("$search") || userPipeline[0].hasOwnProperty("$vectorSearch"))
) {
// The view pipeline is pushed down to a desugared stage, $_internalSearchdLookup. Therefore
// we inspect the stages (which represent the fully desugared pipeline from the user) to
// ensure the view was successfully pushed down.
assertIdLookupContainsViewPipeline(explainOutput, viewPipeline);
} else {
// Whereas view transforms for mongot queries happen after desugaring, regular queries apply
// the view transforms during query expansion. For this reason, we can just inspect the
// explain's command obj, which represents the expanded query (eg the query after
// ResolvedView::asExpandedViewAggregation() was called). This also makes it easier to keep
// explain checks simpler/more consistent between variants that run with SBE turned on and
// SBE turned off, as SBE greatly changes how the stages are portrayed.
assert.eq(explainOutput.command.pipeline.slice(0, viewPipeline.length), viewPipeline);
}
}
/**
* This helper inspects explain.stages to ensure the view pipeline wasn't applied to the final
* execution pipeline.
*
* @param {Object} explainOutput The explain output from the aggregation.
* @param {Array} userPipeline The user pipeline ran on the view. This is only used to check if the
* pipeline contains a top level $search/$vectorSearch stage.
* @param {Array} viewPipeline The view pipeline to verify non-existence of in the explain output.
*/
export function assertViewNotApplied(explainOutput, userPipeline, viewPipeline) {
// Assert that the view pipeline wasn't pushed down to $_internalSearchIdLookup by ensuring
// there are no $_internalSearchIdLookup stages.
let stages = getAggPlanStages(explainOutput, "$_internalSearchIdLookup");
assert(
stages.length == 0,
"There should not be any stages corresponding to $_internalSearchIdLookup in the explain output. " +
tojson(explainOutput),
);
// If a view pipeline isn't pushed down to idLookup, there is a risk it was appended to the user
// pipeline (as is the case for non-search queries on views). It's important to call out that
// this check investigates the most basic case for non-search queries on views, where the view
// pipeline isn't desugared and none of the view stages are pushed down or otherwise rearranged
// during optimization.
if (
userPipeline.length > 0 &&
(userPipeline[0].hasOwnProperty("$search") || userPipeline[0].hasOwnProperty("$vectorSearch"))
) {
assert.neq(explainOutput.command.pipeline.slice(0, viewPipeline.length), viewPipeline);
}
}
/**
* This function is used to assert that the view definition from the search stage *inside* of
* the $unionWith is applied as intended to $_internalSearchIdLookup.
*
* @param {Object} explainOutput The explain output from the whole aggregation.
* @param {Object} collNss The underlying collection namespace of the view that the $unionWith stage
* is run on.
* @param {Object} viewName The view name that the $unionWith stage is run on.
* @param {Array} viewPipeline The view pipeline referenced by the search query inside of the
* $unionWith.
* @param {boolean} isStoredSource Whether the $search query is storedSource or not.
*/
export function assertUnionWithSearchSubPipelineAppliedViews(
explainOutput,
collNss,
viewName,
viewPipeline,
isStoredSource = false,
) {
const unionWithStage = getUnionWithStage(explainOutput);
const unionWithExplain = prepareUnionWithExplain(unionWithStage.$unionWith.pipeline);
if (!isStoredSource) {
// In fully-sharded environments, check for the correct view name on the view object and
// $unionWith stage. For single node and single shard environments, there is no view object
// that will exist on the $unionWith explain output. We can still assert that the
// $unionWith.coll is "resolved" to its collNss. Note that the view name is not resolved to
// its collNss in full-sharded environments, and this is intended behavior.
if (unionWithExplain.hasOwnProperty("splitPipeline") && unionWithExplain["splitPipeline"] !== null) {
const firstStage = unionWithExplain.splitPipeline.shardsPart[0];
print("explainOutput: " + tojson(explainOutput));
// The first stage is either $search or $vectorSearch.
if (firstStage.hasOwnProperty("$search")) {
assert.eq(firstStage.$search.view.name, viewName);
} else if (firstStage.hasOwnProperty("$vectorSearch")) {
assert.eq(firstStage.$vectorSearch.view.name, viewName);
} else {
assert.fail(
"Expected first stage to have either $search or $vectorSearch, but found neither: " +
tojson(firstStage),
);
}
assert.eq(unionWithStage.$unionWith.coll, viewName);
} else {
assert.eq(unionWithStage.$unionWith.coll, collNss.getName());
}
assertIdLookupContainsViewPipeline(unionWithExplain, viewPipeline);
} else {
// Because we are passing through an inner $unionWith explain (i.e. not a normal explain),
// there is no userPipeline check.
assertViewNotApplied(unionWithExplain, [], viewPipeline);
}
}
/**
* This function is used to assert that the $lookup stage exists as expected in the explain
* output. Note that $lookup doesn't apply the view definition in explain like $unionWith does.
*
* @param {Object} explainOutput The explain output from the whole aggregation.
* @param {Object} lookupStage The lookup stage to be searched for in the explain output.
*/
export function assertLookupInExplain(explainOutput, lookupStage) {
// Find the lookup stage in the explain output and assert that it matches the lookup passed
// to the function.
const stage = getLookupStage(explainOutput);
assert(stage, "There should be one $lookup stage in the explain output. " + tojson(explainOutput));
// The explain might add extra stages to the $lookup in its output which is why we can't
// simply assert that the two BSON objects match each other.
Object.keys(lookupStage["$lookup"]).forEach((lookupKey) => {
assert(
stage["$lookup"].hasOwnProperty(lookupKey),
'There should be a key "' + lookupKey + '" in the lookup stage from the explain output.' + tojson(stage),
);
// On the "pipeline" key, simply make sure that the two pipelines have the same length.
// There are some optimizations in various environment configurations that will make the
// actual content of the pipeline different, but the length should stay the same.
if (lookupKey == "pipeline") {
assert.eq(
stage["$lookup"][lookupKey].length,
lookupStage["$lookup"][lookupKey].length,
"The $lookup stage in the explain output should have the same number of " +
"stages as the lookup stage passed to the function." +
tojson(stage),
);
}
});
}
/**
* This function checks that the explain output for $search queries from an e2e test contains
* the information that it should.
* @param {Object} explainOutput the results from running coll.explain().aggregate([[$search:
* ....],
* ...])
* @param {string} stageType ex. "$_internalSearchMongotRemote" , "$_internalSearchIdLookup "
* @param {string} verbosity The verbosity of explain. "nReturned" and
* "executionTimeMillisEstimate" will not be checked for 'queryPlanner' verbosity "
* @param {NumberLong} nReturned not needed if verbosity is 'queryPlanner'. For a sharded
* scenario, this should be the total returned across all shards.
*/
export function verifyE2ESearchExplainOutput({explainOutput, stageType, verbosity, nReturned = null}) {
if (explainOutput.hasOwnProperty("splitPipeline") && explainOutput["splitPipeline"] !== null) {
// We check metadata and protocol version for sharded $search.
verifyShardsPartExplainOutput({result: explainOutput, searchType: "$search"});
}
let totalNReturned = 0;
let stages = getAggPlanStages(explainOutput, stageType);
assert(
stages.length > 0,
"There should be at least one stage corresponding to " +
stageType +
" in the explain output. " +
tojson(explainOutput),
);
// In a sharded scenario, there may be multiple stages. For an unsharded scenario, there is
// only one stage.
for (let stage of stages) {
if (verbosity != "queryPlanner") {
assert(stage.hasOwnProperty("nReturned"));
assert(stage.hasOwnProperty("executionTimeMillisEstimate"));
totalNReturned += stage["nReturned"];
}
// Non $_internalSearchIdLookup stages must contain an explain object.
if (stageType != "$_internalSearchIdLookup") {
const explainStage = stage[stageType];
assert(explainStage.hasOwnProperty("explain"), explainStage);
}
}
if (verbosity != "queryPlanner") {
assert.eq(totalNReturned, nReturned);
}
}
/**
* This function checks that the explain output for $searchMeta queries from an e2e test
* contains the information that it should.
* @param {Object} explainOutput the results from running
* coll.explain().aggregate([[$searchMeta: ....], ...])
* @param {NumberLong} numFacetBucketsAndCount The number of documents mongot returns for the
* query in the sharded scenario. This should be the number of facet buckets involved in the
* query and one more for count.
* @param {string} verbosity The verbosity of explain. "nReturned" and
* "executionTimeMillisEstimate" will not be checked for 'queryPlanner' verbosity "
*/
export function verifyE2ESearchMetaExplainOutput({explainOutput, numFacetBucketsAndCount, verbosity}) {
// In an unsharded scenario, $searchMeta returns one document with all of the facet values.
let nReturned = 1;
if (explainOutput.hasOwnProperty("splitPipeline") && explainOutput["splitPipeline"] !== null) {
// We check metadata and protocol version for sharded $search.
verifyShardsPartExplainOutput({result: explainOutput, searchType: "$searchMeta"});
// In the sharded scenario, $searchMeta returns one document per facet bucket + count,
// as it needs to be merged in the merging pipeline.
nReturned = numFacetBucketsAndCount;
}
let stages = getAggPlanStages(explainOutput, "$searchMeta");
for (let stage of stages) {
validateMongotStageExplainExecutionStats({
stage: stage,
stageType: "$searchMeta",
nReturned: nReturned,
verbosity: verbosity,
isE2E: true,
});
}
}
/**
* This function checks that the explain output for $vectorSearch queries from an e2e test
* contains the information that it should.
*
* In the sharded scenario, make sure the collection has enough documents to return the limit for
* each shard.
* @param {Object} explainOutput the results from running
* coll.explain().aggregate([[$vectorSearch:....], ...])
* @param {string} stageType ex. "$vectorSearch", $_internalSearchMongotRemote" ,
* @param {NumberLong} limit The limit for the $vectorSearch query. In a sharded scenario, this
* applies per shard.
* @param {string} verbosity The verbosity of explain. "nReturned" and
* "executionTimeMillisEstimate" will not be checked for 'queryPlanner' verbosity "
*/
export function verifyE2EVectorSearchExplainOutput({explainOutput, stageType, limit, verbosity}) {
let stages = getAggPlanStages(explainOutput, stageType);
// For $vectorSearch, the limit in the query is applied per shard. This means that nReturned
// for each shard should match the limit. We don't need to differentiate between the sharded
// and unsharded scenario.
for (let stage of stages) {
validateMongotStageExplainExecutionStats({
stage: stage,
stageType: stageType,
nReturned: limit,
verbosity: verbosity,
isE2E: true,
});
}
}
/**
* This function checks that the explain output for two queries contains the same stages, regardless
* of whether the test is sharded and ignoring irrelevant fields like 'optimizationTimeMillis'.
*
* Note that we cannot check that the stages entirely exactly match. In a single node $unionWith on
* a view, the $unionWith explain is related to the resolved namespace / view pipeline, and in
* sharded clusters, the $unionWith explain is related to the user namespace / pipeline.
*
* Therefore, we just check that the stage names match for each stage in a sharded scenario.
*
* @param {Object} realExplainOutput The results from running explain() on the feature being tested,
* e.g. $rankFusion.
* @param {string} expectedExplainOutput The results from running explain() on the baseline query,
* e.g. the verbose version of a ranked fusion query.
*/
export function verifyExplainStagesAreEqual(realExplainOutput, expectedExplainOutput) {
let realExplainOutputStages, expectedExplainStages;
let canCompareStagesExactly = true;
if (realExplainOutput.hasOwnProperty("splitPipeline")) {
if (realExplainOutput["splitPipeline"] != null) {
// Case for sharded collection.
realExplainOutputStages = realExplainOutput["splitPipeline"]["shardsPart"].concat(
realExplainOutput["splitPipeline"]["mergerPart"],
);
expectedExplainStages = expectedExplainOutput["splitPipeline"]["shardsPart"].concat(
expectedExplainOutput["splitPipeline"]["mergerPart"],
);
canCompareStagesExactly = false;
} else {
// Case for single shard.
const firstShard = Object.keys(realExplainOutput["shards"])[0];
realExplainOutputStages = realExplainOutput["shards"][firstShard]["stages"];
expectedExplainStages = expectedExplainOutput["shards"][firstShard]["stages"];
}
} else {
realExplainOutputStages = realExplainOutput["stages"];
expectedExplainStages = expectedExplainOutput["stages"];
}
if (canCompareStagesExactly) {
assert(
arrayEq(
realExplainOutputStages,
expectedExplainStages,
false /* verbose */,
null /* valueComparator */,
["optimizationTimeMillis"] /* fieldsToSkip */,
),
"Explains did not match in 'stages'. Expected:\n" +
tojson(expectedExplainOutput) +
"\nView:\n" +
tojson(realExplainOutput),
);
} else {
// This branch extracts the top-level stage names from the explain pipeline and ensures that
// they are equal - it *does not* compare the stage specs themselves.
function getStageNamesFromExplain(pipeline) {
return pipeline.map((stage) => Object.keys(stage)[0]);
}
const realExplainOutputStageNames = getStageNamesFromExplain(realExplainOutputStages);
const expectedExplainOutputStageNames = getStageNamesFromExplain(expectedExplainStages);
assert(
arrayEq(
realExplainOutputStageNames,
expectedExplainOutputStageNames,
false /* verbose */,
null /* valueComparator */,
[] /* fieldsToSkip */,
),
"Explains did not match in stage names. Expected: \n" +
tojson(expectedExplainOutputStageNames) +
"\nView:\n" +
tojson(realExplainOutputStageNames),
);
}
}