Previously, resharding operations were aborted after sending the request to finalize the FCV version on the shards when upgrading, making it is possible that a resharding recipient will update its FCV to its target value prior to aborting. It was therefore not possible to differentiate a resharding operation that has upgraded to the latest version since starting from one that has been the latest version throughout. For this reason, the resharding operations are now aborted first. Furthermore, the resharding command currently ensures that the FCV cannot change while setting up the coordinator. However, it did not check to make sure that the current FCV is not currently in an upgrading or downgrading state. After making the above change, this would allow for the possibility for a new resharding operation to begin during an FCV upgrade, after resharding operations are aborted, but before the shards complete the FCV upgrade. This would have the consequence of the operation running across FCVs without being aborted. As such, the reshard command now fails if the current FCV is either upgrading or downgrading. These changes in combination should guarantee that during a version change, a new resharding operation cannot begin and a previously running resharding operation always aborts completely before reaching the target version. Note that it is still possible for a resharding operation to reach an upgrading or downgrading FCV before being aborted. These changes were made in the interest of being able to assert that newly added optional fields that should always be set were indeed set. As such, this change also enables the assertion disabled by SERVER-65039.
130 lines
4.7 KiB
JavaScript
130 lines
4.7 KiB
JavaScript
(function() {
|
|
"use strict";
|
|
|
|
load("jstests/sharding/libs/resharding_test_fixture.js");
|
|
load('jstests/libs/discover_topology.js');
|
|
load('jstests/libs/fail_point_util.js');
|
|
load('jstests/sharding/libs/sharded_transactions_helpers.js');
|
|
|
|
function runTest(forcePooledConnectionsDropped) {
|
|
const reshardingTest =
|
|
new ReshardingTest({numDonors: 2, numRecipients: 2, reshardInPlace: true});
|
|
reshardingTest.setup();
|
|
|
|
const donorShardNames = reshardingTest.donorShardNames;
|
|
let inputCollection = reshardingTest.createShardedCollection({
|
|
ns: "reshardingDb.testColl",
|
|
shardKeyPattern: {oldKey: 1},
|
|
chunks: [
|
|
{min: {oldKey: MinKey}, max: {oldKey: 0}, shard: donorShardNames[0]},
|
|
{min: {oldKey: 0}, max: {oldKey: MaxKey}, shard: donorShardNames[1]},
|
|
],
|
|
});
|
|
|
|
let mongos = inputCollection.getMongo();
|
|
|
|
for (let x = 0; x < 1000; x++) {
|
|
assert.commandWorked(inputCollection.insert({oldKey: x, newKey: -1 * x}));
|
|
}
|
|
|
|
const topology = DiscoverTopology.findConnectedNodes(mongos);
|
|
const config = new Mongo(topology.configsvr.primary);
|
|
|
|
let pauseBeforeTellDonorToRefresh;
|
|
let pauseBeforeCloseCxns;
|
|
if (forcePooledConnectionsDropped) {
|
|
pauseBeforeTellDonorToRefresh = configureFailPoint(config, "pauseBeforeTellDonorToRefresh");
|
|
pauseBeforeCloseCxns = configureFailPoint(config, "pauseBeforeCloseCxns");
|
|
}
|
|
|
|
const recipientShardNames = reshardingTest.recipientShardNames;
|
|
reshardingTest.withReshardingInBackground(
|
|
{
|
|
newShardKeyPattern: {newKey: 1},
|
|
newChunks: [
|
|
{min: {newKey: MinKey}, max: {newKey: 0}, shard: recipientShardNames[0]},
|
|
{min: {newKey: 0}, max: {newKey: MaxKey}, shard: recipientShardNames[1]},
|
|
],
|
|
},
|
|
() => {
|
|
// Wait for config server to have started resharding before sending setFCV, otherwise
|
|
// there is a possible race where setFCV can be sent to the config before
|
|
// configsvrReshard.
|
|
assert.soon(() => {
|
|
return mongos.getDB('config').reshardingOperations.findOne() != null;
|
|
}, "timed out waiting for coordinator doc to be written", 30 * 1000);
|
|
|
|
if (forcePooledConnectionsDropped) {
|
|
pauseBeforeTellDonorToRefresh.wait();
|
|
}
|
|
|
|
let codeToRunInParallelShell =
|
|
`{
|
|
assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}));
|
|
}`;
|
|
|
|
let awaitShell = startParallelShell(codeToRunInParallelShell, mongos.port);
|
|
|
|
if (forcePooledConnectionsDropped) {
|
|
pauseBeforeCloseCxns.wait();
|
|
|
|
let pauseBeforeMarkKeepOpen = configureFailPoint(config, "pauseBeforeMarkKeepOpen");
|
|
|
|
pauseBeforeTellDonorToRefresh.off();
|
|
|
|
jsTestLog("Wait to hit pauseBeforeMarkKeepOpen failpoint");
|
|
pauseBeforeMarkKeepOpen.wait();
|
|
|
|
jsTestLog("Set hitDropConnections failpoint");
|
|
let hitDropConnections = configureFailPoint(config, "finishedDropConnections");
|
|
pauseBeforeCloseCxns.off();
|
|
|
|
waitForFailpoint("Hit finishedDropConnections", 1);
|
|
clearRawMongoProgramOutput();
|
|
|
|
jsTestLog("Turn off hitDropConnections failpoint");
|
|
hitDropConnections.off();
|
|
|
|
jsTestLog("Turn off pause before pauseBeforeMarkKeepOpen failpoint");
|
|
pauseBeforeMarkKeepOpen.off();
|
|
}
|
|
|
|
awaitShell();
|
|
},
|
|
{
|
|
expectedErrorCode: [
|
|
ErrorCodes.ReshardCollectionAborted,
|
|
ErrorCodes.Interrupted,
|
|
]
|
|
});
|
|
|
|
reshardingTest.withReshardingInBackground(
|
|
{
|
|
newShardKeyPattern: {newKey: 1},
|
|
newChunks: [
|
|
{min: {newKey: MinKey}, max: {newKey: 0}, shard: recipientShardNames[0]},
|
|
{min: {newKey: 0}, max: {newKey: MaxKey}, shard: recipientShardNames[1]},
|
|
],
|
|
},
|
|
() => {
|
|
assert.commandWorked(mongos.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
|
|
},
|
|
{
|
|
expectedErrorCode: [
|
|
ErrorCodes.CommandNotSupported,
|
|
ErrorCodes.ReshardCollectionAborted,
|
|
ErrorCodes.Interrupted,
|
|
]
|
|
});
|
|
|
|
reshardingTest.teardown();
|
|
}
|
|
|
|
// This test case forces the setFCV command to call dropsConnections while the coordinator is in
|
|
// the process of establishing connections to the participant shards in order to ensure that the
|
|
// resharding operation does not stall.
|
|
runTest(true);
|
|
|
|
runTest(false);
|
|
})();
|