320 lines
13 KiB
JavaScript
320 lines
13 KiB
JavaScript
/**
|
|
* Wrapper around ReplSetTest for testing rollback behavior. It allows the caller to easily
|
|
* transition between stages of a rollback without having to manually operate on the replset.
|
|
*
|
|
* This library exposes the following 5 sequential stages of rollback:
|
|
* 1. RollbackTest starts in kSteadyStateOps: the replica set is in steady state replication.
|
|
* Operations applied will be replicated.
|
|
* 2. kRollbackOps: operations applied during this phase will not be replicated and eventually be
|
|
* rolled back.
|
|
* 3. kSyncSourceOpsBeforeRollback: apply operations on the sync source before rollback begins.
|
|
* 4. kSyncSourceOpsDuringRollback: apply operations on the sync source after rollback has begun.
|
|
* 5. kSteadyStateOps: (same as stage 1) with the option of waiting for the rollback to finish.
|
|
*
|
|
* Please refer to the various `transition*` functions for more information on the behavior
|
|
* of each stage.
|
|
*/
|
|
|
|
"use strict";
|
|
|
|
load("jstests/replsets/rslib.js");
|
|
load("jstests/hooks/validate_collections.js");
|
|
|
|
/**
|
|
* This fixture allows the user to optionally pass in a custom ReplSetTest
|
|
* to be used for the test. The underlying replica set must meet the following
|
|
* requirements:
|
|
* 1. It must have exactly three nodes: a primary, a secondary and an arbiter.
|
|
* 2. It must be running with mongobridge.
|
|
*
|
|
* If the caller does not provide their own replica set, a standard three-node
|
|
* replset will be initialized instead, with all nodes running the latest version.
|
|
*
|
|
* @param {string} [optional] name the name of the test being run
|
|
* @param {Object} [optional] replSet the ReplSetTest instance to adopt
|
|
*/
|
|
function RollbackTest(name = "RollbackTest", replSet) {
|
|
const State = {
|
|
kStopped: "kStopped",
|
|
kRollbackOps: "kRollbackOps",
|
|
// DEPRECATED: Remove this line after TIG-680.
|
|
kSyncSourceOps: "kSyncSourceOpsBeforeRollback",
|
|
kSyncSourceOpsBeforeRollback: "kSyncSourceOpsBeforeRollback",
|
|
kSyncSourceOpsDuringRollback: "kSyncSourceOpsDuringRollback",
|
|
kSteadyStateOps: "kSteadyStateOps",
|
|
};
|
|
|
|
const AcceptableTransitions = {
|
|
[State.kStopped]: [],
|
|
[State.kRollbackOps]: [State.kSyncSourceOpsBeforeRollback],
|
|
// DEPRECATED: remove transition to State.kSteadyStateOps after TIG-680.
|
|
[State.kSyncSourceOpsBeforeRollback]:
|
|
[State.kSyncSourceOpsDuringRollback, State.kSteadyStateOps],
|
|
[State.kSyncSourceOpsDuringRollback]: [State.kSteadyStateOps],
|
|
[State.kSteadyStateOps]: [State.kStopped, State.kRollbackOps],
|
|
};
|
|
|
|
const collectionValidator = new CollectionValidator();
|
|
|
|
let rst;
|
|
let curPrimary;
|
|
let curSecondary;
|
|
let arbiter;
|
|
|
|
let curState = State.kSteadyStateOps;
|
|
let lastRBID;
|
|
|
|
// Make sure we have a replica set up and running.
|
|
replSet = (replSet === undefined) ? performStandardSetup() : replSet;
|
|
validateAndUseSetup(replSet);
|
|
|
|
/**
|
|
* Validate and use the provided replica set.
|
|
*
|
|
* @param {Object} replSet the ReplSetTest instance to adopt
|
|
*/
|
|
function validateAndUseSetup(replSet) {
|
|
assert.eq(true,
|
|
replSet instanceof ReplSetTest,
|
|
`Must provide an instance of ReplSetTest. Have: ${tojson(replSet)}`);
|
|
|
|
assert.eq(true, replSet.usesBridge(), "Must set up ReplSetTest with mongobridge enabled.");
|
|
assert.eq(3, replSet.nodes.length, "Replica set must contain exactly three nodes.");
|
|
|
|
// Make sure we have a primary.
|
|
curPrimary = replSet.getPrimary();
|
|
|
|
// Extract the other two nodes and wait for them to be ready.
|
|
let secondaries = replSet.getSecondaries();
|
|
arbiter = replSet.getArbiter();
|
|
curSecondary = (secondaries[0] === arbiter) ? secondaries[1] : secondaries[0];
|
|
|
|
waitForState(curSecondary, ReplSetTest.State.SECONDARY);
|
|
waitForState(arbiter, ReplSetTest.State.ARBITER);
|
|
|
|
rst = replSet;
|
|
lastRBID = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
|
|
}
|
|
|
|
/**
|
|
* Return an instance of ReplSetTest initialized with a standard
|
|
* three-node replica set running with the latest version.
|
|
*/
|
|
function performStandardSetup() {
|
|
let replSet = new ReplSetTest({name, nodes: 3, useBridge: true});
|
|
replSet.startSet();
|
|
|
|
const nodes = replSet.nodeList();
|
|
replSet.initiate({
|
|
_id: name,
|
|
members: [
|
|
{_id: 0, host: nodes[0]},
|
|
{_id: 1, host: nodes[1]},
|
|
{_id: 2, host: nodes[2], arbiterOnly: true}
|
|
]
|
|
});
|
|
return replSet;
|
|
}
|
|
|
|
function checkDataConsistency() {
|
|
assert.eq(curState,
|
|
State.kSteadyStateOps,
|
|
"Not in kSteadyStateOps state, cannot check data consistency");
|
|
const name = rst.name;
|
|
rst.checkOplogs(name);
|
|
rst.checkReplicatedDataHashes(name);
|
|
collectionValidator.validateNodes(rst.nodeList());
|
|
}
|
|
|
|
function log(msg, important = false) {
|
|
if (important) {
|
|
jsTestLog(`[${name}] ${msg}`);
|
|
} else {
|
|
print(`[${name}] ${msg}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* return whether the cluster can transition from the current State to `newState`.
|
|
* @private
|
|
*/
|
|
function transitionIfAllowed(newState) {
|
|
if (AcceptableTransitions[curState].includes(newState)) {
|
|
log(`Transitioning to: "${newState}"`, true);
|
|
curState = newState;
|
|
} else {
|
|
// Transitioning to a disallowed State is likely a bug in the code, so we throw an
|
|
// error here instead of silently failing.
|
|
throw new Error(`Can't transition to State "${newState}" from State "${curState}"`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Transition from a rollback state to a steady state. Operations applied in this phase will
|
|
* be replicated to all nodes and should not be rolled back.
|
|
*/
|
|
this.transitionToSteadyStateOperations = function() {
|
|
|
|
// Ensure the secondary is connected. It may already have been connected from a previous
|
|
// stage.
|
|
log(`Ensuring the secondary ${curSecondary.host} is connected to the other nodes`);
|
|
curSecondary.reconnect([curPrimary, arbiter]);
|
|
|
|
log("Waiting for rollback to complete", true);
|
|
let rbid = -1;
|
|
assert.soon(() => {
|
|
try {
|
|
rbid = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
|
|
} catch (e) {
|
|
// Command can fail when sync source is being cleared.
|
|
}
|
|
// Fail early if the rbid is greater than lastRBID+1.
|
|
assert.lte(rbid,
|
|
lastRBID + 1,
|
|
`RBID is too large. current RBID: ${rbid}, last RBID: ${lastRBID}`);
|
|
|
|
return rbid === lastRBID + 1;
|
|
|
|
}, "Timed out waiting for RBID to increment");
|
|
|
|
rst.awaitSecondaryNodes();
|
|
rst.awaitReplication();
|
|
|
|
log("Rollback and awaitReplication completed", true);
|
|
|
|
// We call transition to steady state ops after awaiting replication has finished,
|
|
// otherwise it could be confusing to see operations being replicated when we're already
|
|
// in rollback complete state.
|
|
transitionIfAllowed(State.kSteadyStateOps);
|
|
|
|
// After the previous rollback (if any) has completed and await replication has finished,
|
|
// the replica set should be in a consistent and "fresh" state. We now prepare for the next
|
|
// rollback.
|
|
checkDataConsistency();
|
|
|
|
return curPrimary;
|
|
};
|
|
|
|
/**
|
|
* Transition to the first stage of rollback testing, where we isolate the current primary so
|
|
* its operations will eventually be rolled back.
|
|
*/
|
|
this.transitionToRollbackOperations = function() {
|
|
// Ensure previous operations are replicated. The current secondary will be used as the sync
|
|
// source later on, so it must be up-to-date to prevent any previous operations from being
|
|
// rolled back.
|
|
rst.awaitSecondaryNodes();
|
|
rst.awaitReplication();
|
|
|
|
transitionIfAllowed(State.kRollbackOps);
|
|
|
|
// Disconnect the current primary from the secondary so operations on it will eventually be
|
|
// rolled back. But do not disconnect it from the arbiter so it can stay as the primary.
|
|
log(`Isolating the primary ${curPrimary.host} from the secondary ${curSecondary.host}`);
|
|
curPrimary.disconnect([curSecondary]);
|
|
|
|
return curPrimary;
|
|
};
|
|
|
|
/**
|
|
* Transition to the second stage of rollback testing, where we isolate the old primary and
|
|
* elect the old secondary as the new primary. Then, operations can be performed on the new
|
|
* primary so that that optimes diverge and previous operations on the old primary will be
|
|
* rolled back.
|
|
*/
|
|
this.transitionToSyncSourceOperationsBeforeRollback = function() {
|
|
transitionIfAllowed(State.kSyncSourceOpsBeforeRollback);
|
|
|
|
// Insert one document to ensure rollback will not be skipped.
|
|
let dbName = "EnsureThereIsAtLeastOneOperationToRollback";
|
|
assert.writeOK(curPrimary.getDB(dbName).ensureRollback.insert(
|
|
{thisDocument: 'is inserted to ensure rollback is not skipped'}));
|
|
|
|
log(`Isolating the primary ${curPrimary.host} so it will step down`);
|
|
curPrimary.disconnect([curSecondary, arbiter]);
|
|
|
|
log(`Waiting for the primary ${curPrimary.host} to step down`);
|
|
try {
|
|
// The stepdown freeze period is short because the node is disconnected from
|
|
// the rest of the replica set, so it physically can't become the primary.
|
|
curPrimary.adminCommand({replSetStepDown: 1, force: true});
|
|
} catch (e) {
|
|
// Stepdown may fail if the node has already started stepping down.
|
|
print('Caught exception from replSetStepDown: ' + e);
|
|
}
|
|
|
|
waitForState(curPrimary, ReplSetTest.State.SECONDARY);
|
|
|
|
log(`Reconnecting the secondary ${curSecondary.host} to the arbiter so it can be elected`);
|
|
curSecondary.reconnect([arbiter]);
|
|
|
|
log(`Waiting for the new primary ${curSecondary.host} to be elected`);
|
|
if (rst.getReplSetConfig().protocolVersion > 0) {
|
|
assert.soonNoExcept(() => {
|
|
const res = curSecondary.adminCommand({replSetStepUp: 1});
|
|
return res.ok;
|
|
});
|
|
}
|
|
|
|
const newPrimary = rst.getPrimary();
|
|
|
|
// As a sanity check, ensure the new primary is the old secondary. The opposite scenario
|
|
// should never be possible with 2 electable nodes and the sequence of operations thus far.
|
|
assert.eq(newPrimary, curSecondary, "Did not elect a new node as primary");
|
|
log(`Elected the old secondary ${newPrimary.host} as the new primary`);
|
|
|
|
if (rst.getReplSetConfig().protocolVersion === 0) {
|
|
// Add a sleep and a dummy write to ensure the new primary has an optime greater than
|
|
// the last optime on the node that will undergo rollback. This greater optime ensures
|
|
// that the new primary is eligible to become a sync source in pv0.
|
|
sleep(1000);
|
|
dbName = "ensureEligiblePV0";
|
|
assert.writeOK(newPrimary.getDB(dbName).testColl.insert({id: 0}));
|
|
}
|
|
|
|
// The old primary is the new secondary; the old secondary just got elected as the new
|
|
// primary, so we update the topology to reflect this change.
|
|
curSecondary = curPrimary;
|
|
curPrimary = newPrimary;
|
|
|
|
lastRBID = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
|
|
|
|
return curPrimary;
|
|
};
|
|
|
|
// DEPRECATED: remove this line after TIG-680.
|
|
this.transitionToSyncSourceOperations = this.transitionToSyncSourceOperationsBeforeRollback;
|
|
|
|
/**
|
|
* Transition to the third stage of rollback testing, where we reconnect the rollback node so
|
|
* it will start rolling back.
|
|
*
|
|
* Note that there is no guarantee that operations performed on the sync source while in this
|
|
* state will actually occur *during* the rollback process. They may happen before the rollback
|
|
* is finished or after the rollback is done. We provide this state, though, as an attempt to
|
|
* provide a way to test this behavior, even if it's non-deterministic.
|
|
*/
|
|
this.transitionToSyncSourceOperationsDuringRollback = function() {
|
|
transitionIfAllowed(State.kSyncSourceOpsDuringRollback);
|
|
|
|
log(`Reconnecting the secondary ${curSecondary.host} so it'll go into rollback`);
|
|
curSecondary.reconnect([curPrimary, arbiter]);
|
|
|
|
return curPrimary;
|
|
};
|
|
|
|
this.stop = function() {
|
|
checkDataConsistency();
|
|
transitionIfAllowed(State.kStopped);
|
|
return rst.stopSet();
|
|
};
|
|
|
|
this.getPrimary = function() {
|
|
return curPrimary;
|
|
};
|
|
|
|
this.getSecondary = function() {
|
|
return curSecondary;
|
|
};
|
|
}
|