199 lines
9.0 KiB
JavaScript
199 lines
9.0 KiB
JavaScript
/**
|
|
* Test distinct() on multikey indexes using a dotted path.
|
|
*
|
|
* Assumes the collection is not sharded, because sharding the collection could result in different
|
|
* plans being chosen on different shards (for example, if an index is multikey on one shard but
|
|
* not another).
|
|
* Doesn't support stepdowns because it runs explain() on an aggregation (which can apparently
|
|
* return incomplete results).
|
|
* @tags: [assumes_unsharded_collection, does_not_support_stepdowns]
|
|
*/
|
|
(function() {
|
|
"use strict";
|
|
load("jstests/libs/analyze_plan.js"); // For planHasStage().
|
|
|
|
const coll = db.distinct_multikey;
|
|
coll.drop();
|
|
assert.commandWorked(coll.createIndex({"a.b.c": 1}));
|
|
|
|
assert.commandWorked(coll.insert({a: {b: {c: 1}}}));
|
|
assert.commandWorked(coll.insert({a: {b: {c: 2}}}));
|
|
assert.commandWorked(coll.insert({a: {b: {c: 3}}}));
|
|
assert.commandWorked(coll.insert({a: {b: {notRelevant: 3}}}));
|
|
assert.commandWorked(coll.insert({a: {notRelevant: 3}}));
|
|
|
|
const numPredicate = {"a.b.c": {$gt: 0}};
|
|
|
|
function getAggPipelineForDistinct(path) {
|
|
return [{$group: {_id: "$" + path}}];
|
|
}
|
|
|
|
// Run an agg pipeline with a $group, and convert the results so they're equivalent
|
|
// to what a distinct() would return.
|
|
// Note that $group will treat an array as its own key rather than unwinding it. This means
|
|
// that a $group on a field that's multikey will have different behavior than a distinct(), so
|
|
// we only use this function for non-multikey fields.
|
|
function distinctResultsFromPipeline(pipeline) {
|
|
const res = coll.aggregate(pipeline).toArray();
|
|
return res.map((x) => x._id);
|
|
}
|
|
|
|
// Be sure a distinct scan is used when the index is not multi key.
|
|
(function testDistinctWithNonMultikeyIndex() {
|
|
const results = coll.distinct("a.b.c");
|
|
// TODO SERVER-14832: Returning 'null' here is inconsistent with the behavior when no index
|
|
// is present.
|
|
assert.sameMembers([1, 2, 3, null], results);
|
|
|
|
const expl = coll.explain().distinct("a.b.c");
|
|
assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"), expl);
|
|
|
|
// Do an equivalent query using $group.
|
|
const pipeline = getAggPipelineForDistinct("a.b.c");
|
|
const aggResults = distinctResultsFromPipeline(pipeline);
|
|
assert.sameMembers(aggResults, results);
|
|
const aggExpl = assert.commandWorked(coll.explain().aggregate(pipeline));
|
|
assert.gt(getAggPlanStages(aggExpl, "DISTINCT_SCAN").length, 0);
|
|
})();
|
|
|
|
// Distinct with a predicate.
|
|
(function testDistinctWithPredWithNonMultikeyIndex() {
|
|
const results = coll.distinct("a.b.c", numPredicate);
|
|
assert.sameMembers([1, 2, 3], results);
|
|
|
|
const expl = coll.explain().distinct("a.b.c", numPredicate);
|
|
|
|
assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"), expl);
|
|
|
|
const pipeline = [{$match: numPredicate}].concat(getAggPipelineForDistinct("a.b.c"));
|
|
const aggResults = distinctResultsFromPipeline(pipeline);
|
|
assert.sameMembers(aggResults, results);
|
|
const aggExpl = assert.commandWorked(coll.explain().aggregate(pipeline));
|
|
assert.gt(getAggPlanStages(aggExpl, "DISTINCT_SCAN").length, 0);
|
|
})();
|
|
|
|
// Make the index multi key.
|
|
assert.commandWorked(coll.insert({a: {b: [{c: 4}, {c: 5}]}}));
|
|
assert.commandWorked(coll.insert({a: {b: [{c: 4}, {c: 6}]}}));
|
|
// Empty array is indexed as 'undefined'.
|
|
assert.commandWorked(coll.insert({a: {b: {c: []}}}));
|
|
|
|
// We should still use the index as long as the path we distinct() on is never an array
|
|
// index.
|
|
(function testDistinctWithMultikeyIndex() {
|
|
const multiKeyResults = coll.distinct("a.b.c");
|
|
// TODO SERVER-14832: Returning 'null' and 'undefined' here is inconsistent with the
|
|
// behavior when no index is present.
|
|
assert.sameMembers([1, 2, 3, 4, 5, 6, null, undefined], multiKeyResults);
|
|
const expl = coll.explain().distinct("a.b.c");
|
|
|
|
assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"));
|
|
|
|
// Not running same query with $group now that the field is multikey. See comment above.
|
|
})();
|
|
|
|
// We cannot use the DISTINCT_SCAN optimization when there is a multikey path in the key and
|
|
// there is a predicate. The reason is that we may have a predicate like {a: 4}, and two
|
|
// documents: {a: [4, 5]}, {a: [4, 6]}. With a DISTINCT_SCAN, we would "skip over" one of the
|
|
// documents, and leave out either '5' or '6', rather than providing the correct result of
|
|
// [4, 5, 6]. The test below is for a similar case.
|
|
(function testDistinctWithPredWithMultikeyIndex() {
|
|
const pred = {"a.b.c": 4};
|
|
const results = coll.distinct("a.b.c", pred);
|
|
assert.sameMembers([4, 5, 6], results);
|
|
|
|
const expl = coll.explain().distinct("a.b.c", pred);
|
|
assert.eq(false, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"), expl);
|
|
assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "IXSCAN"), expl);
|
|
|
|
// Not running same query with $group now that the field is multikey. See comment above.
|
|
})();
|
|
|
|
// Perform a distinct on a path where the last component is multikey.
|
|
(function testDistinctOnPathWhereLastComponentIsMultiKey() {
|
|
assert.commandWorked(coll.createIndex({"a.b": 1}));
|
|
const multiKeyResults = coll.distinct("a.b");
|
|
assert.sameMembers(
|
|
[
|
|
null, // From the document with no 'b' field. TODO SERVER-14832: this is
|
|
// inconsistent with behavior when no index is present.
|
|
{c: 1},
|
|
{c: 2},
|
|
{c: 3},
|
|
{c: 4},
|
|
{c: 5},
|
|
{c: 6},
|
|
{c: []},
|
|
{notRelevant: 3}
|
|
],
|
|
multiKeyResults);
|
|
|
|
const expl = coll.explain().distinct("a.b");
|
|
assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"));
|
|
|
|
// Not running same query with $group now that the field is multikey. See comment above.
|
|
})();
|
|
|
|
(function testDistinctOnPathWhereLastComponentIsMultiKeyWithPredicate() {
|
|
assert.commandWorked(coll.createIndex({"a.b": 1}));
|
|
const pred = {"a.b": {$type: "array"}};
|
|
const multiKeyResults = coll.distinct("a.b", pred);
|
|
assert.sameMembers(
|
|
[
|
|
{c: 4},
|
|
{c: 5},
|
|
{c: 6},
|
|
],
|
|
multiKeyResults);
|
|
|
|
const expl = coll.explain().distinct("a.b", pred);
|
|
assert.eq(false, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"));
|
|
assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "IXSCAN"));
|
|
|
|
// Not running same query with $group now that the field is multikey. See comment above.
|
|
})();
|
|
|
|
// If the path we distinct() on includes an array index, a COLLSCAN should be used,
|
|
// even if an index is available on the prefix to the array component ("a.b" in this case).
|
|
(function testDistinctOnNumericMultikeyPathNoIndex() {
|
|
const res = coll.distinct("a.b.0");
|
|
assert.eq(res, [{c: 4}]);
|
|
|
|
const expl = coll.explain().distinct("a.b.0");
|
|
assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "COLLSCAN"), expl);
|
|
|
|
// Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will
|
|
// only treat '0' as a field name (not array index).
|
|
})();
|
|
|
|
// Creating an index on "a.b.0" and doing a distinct on it should be able to use DISTINCT_SCAN.
|
|
(function testDistinctOnNumericMultikeyPathWithIndex() {
|
|
assert.commandWorked(coll.createIndex({"a.b.0": 1}));
|
|
assert.commandWorked(coll.insert({a: {b: {0: "hello world"}}}));
|
|
const res = coll.distinct("a.b.0");
|
|
assert.sameMembers(res, [{c: 4}, "hello world"]);
|
|
|
|
const expl = coll.explain().distinct("a.b.0");
|
|
assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"), expl);
|
|
|
|
// Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will
|
|
// only treat '0' as a field name (not array index).
|
|
})();
|
|
|
|
// Creating an index on "a.b.0" and doing a distinct on it should use an IXSCAN, as "a.b" is
|
|
// multikey. See explanation above about why a DISTINCT_SCAN cannot be used when the path
|
|
// given is multikey.
|
|
(function testDistinctWithPredOnNumericMultikeyPathWithIndex() {
|
|
const pred = {"a.b.0": {$type: "object"}};
|
|
const res = coll.distinct("a.b.0", pred);
|
|
assert.sameMembers(res, [{c: 4}]);
|
|
|
|
const expl = coll.explain().distinct("a.b.0", pred);
|
|
assert.eq(false, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"), expl);
|
|
assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "IXSCAN"), expl);
|
|
|
|
// Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will
|
|
// only treat '0' as a field name (not array index).
|
|
})();
|
|
})();
|