Catalog epoch changes happen during rollback. This fixes a bug where a getMore against an SBE cursor could crash the server if the cursor survived a rollback.
120 lines
5.2 KiB
JavaScript
120 lines
5.2 KiB
JavaScript
/*
|
|
* This test makes sure 'find' and 'getMore' commands fail correctly during rollback.
|
|
*
|
|
* @tags: [
|
|
* # The 'getMoreHangAfterPinCursor' failpoint is not present in 4.4.
|
|
* requires_fcv_49,
|
|
* requires_majority_read_concern,
|
|
* ]
|
|
*/
|
|
(function() {
|
|
"use strict";
|
|
|
|
load("jstests/replsets/libs/rollback_test.js");
|
|
load("jstests/replsets/rslib.js");
|
|
|
|
const dbName = "test";
|
|
const collName = "coll";
|
|
|
|
// Set up Rollback Test.
|
|
let rollbackTest = new RollbackTest();
|
|
|
|
// Insert documents to be read later.
|
|
assert.commandWorked(rollbackTest.getPrimary().getDB(dbName)[collName].insert([{}, {}, {}]));
|
|
|
|
let rollbackNode = rollbackTest.transitionToRollbackOperations();
|
|
|
|
// Open a cursor on 'rollbackNode' which returns partial results, but will remain open and idle
|
|
// during the rollback process.
|
|
const findCmdRes =
|
|
assert.commandWorked(rollbackNode.getDB(dbName).runCommand({"find": collName, batchSize: 2}));
|
|
assert.eq(2, findCmdRes.cursor.firstBatch.length, findCmdRes);
|
|
const idleCursorId = findCmdRes.cursor.id;
|
|
assert.neq(0, idleCursorId, findCmdRes);
|
|
|
|
setFailPoint(rollbackNode, "rollbackHangAfterTransitionToRollback");
|
|
|
|
setFailPoint(rollbackNode, "getMoreHangAfterPinCursor");
|
|
|
|
const joinGetMoreThread = startParallelShell(() => {
|
|
db.getMongo().setSecondaryOk();
|
|
const cursorID = assert.commandWorked(db.runCommand({"find": "coll", batchSize: 0})).cursor.id;
|
|
// Make sure an outstanding read operation gets killed during rollback even though the read
|
|
// was started before rollback. Outstanding read operations are killed during rollback and
|
|
// their connections are closed shortly after. So we would get either an error
|
|
// (InterruptedDueToReplStateChange) if the error message is sent out and received before
|
|
// the connection is closed or a network error exception.
|
|
try {
|
|
assert.commandFailedWithCode(db.runCommand({"getMore": cursorID, collection: "coll"}),
|
|
ErrorCodes.InterruptedDueToReplStateChange);
|
|
} catch (e) {
|
|
assert.includes(e.toString(), "network error while attempting to run command");
|
|
}
|
|
}, rollbackNode.port);
|
|
|
|
const cursorIdToBeReadDuringRollback =
|
|
assert.commandWorked(rollbackNode.getDB(dbName).runCommand({"find": collName, batchSize: 0}))
|
|
.cursor.id;
|
|
|
|
// Wait for 'getMore' to hang on the test collection.
|
|
assert.soonNoExcept(() => {
|
|
const filter = {"command.getMore": {$exists: true}, "command.collection": collName};
|
|
return rollbackNode.getDB(dbName).adminCommand("currentOp", filter).inprog.length === 1;
|
|
});
|
|
|
|
// Start rollback.
|
|
rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
|
|
rollbackTest.transitionToSyncSourceOperationsDuringRollback();
|
|
|
|
jsTestLog("Reconnecting to " + rollbackNode.host + " after rollback");
|
|
reconnect(rollbackNode.getDB(dbName));
|
|
|
|
// Wait for rollback to hang.
|
|
checkLog.contains(rollbackNode, "rollbackHangAfterTransitionToRollback fail point enabled.");
|
|
|
|
clearFailPoint(rollbackNode, "getMoreHangAfterPinCursor");
|
|
|
|
jsTestLog("Wait for 'getMore' thread to join.");
|
|
joinGetMoreThread();
|
|
|
|
jsTestLog("Reading during rollback.");
|
|
// Make sure that read operations fail during rollback.
|
|
assert.commandFailedWithCode(rollbackNode.getDB(dbName).runCommand({"find": collName}),
|
|
ErrorCodes.NotPrimaryOrSecondary);
|
|
assert.commandFailedWithCode(rollbackNode.getDB(dbName).runCommand(
|
|
{"getMore": cursorIdToBeReadDuringRollback, collection: collName}),
|
|
ErrorCodes.NotPrimaryOrSecondary);
|
|
|
|
// Disable the best-effort check for primary-ness in the service entry point, so that we
|
|
// exercise the real check for primary-ness in 'find' and 'getMore' commands.
|
|
setFailPoint(rollbackNode, "skipCheckingForNotPrimaryInCommandDispatch");
|
|
jsTestLog("Reading during rollback (again with command dispatch checks disabled).");
|
|
assert.commandFailedWithCode(rollbackNode.getDB(dbName).runCommand({"find": collName}),
|
|
ErrorCodes.NotPrimaryOrSecondary);
|
|
assert.commandFailedWithCode(rollbackNode.getDB(dbName).runCommand(
|
|
{"getMore": cursorIdToBeReadDuringRollback, collection: collName}),
|
|
ErrorCodes.NotPrimaryOrSecondary);
|
|
|
|
clearFailPoint(rollbackNode, "rollbackHangAfterTransitionToRollback");
|
|
|
|
rollbackTest.transitionToSteadyStateOperations();
|
|
|
|
const replMetrics = assert.commandWorked(rollbackNode.adminCommand({serverStatus: 1})).metrics.repl;
|
|
assert.eq(replMetrics.stateTransition.lastStateTransition, "rollback");
|
|
assert(replMetrics.stateTransition.userOperationsRunning,
|
|
() => "Response should have a 'stateTransition.userOperationsRunning' field: " +
|
|
tojson(replMetrics));
|
|
assert(replMetrics.stateTransition.userOperationsKilled,
|
|
() => "Response should have a 'stateTransition.userOperationsKilled' field: " +
|
|
tojson(replMetrics));
|
|
|
|
// Run a getMore against the idle cursor that remained open throughout the rollback. The getMore
|
|
// should fail since the cursor has been invalidated by the rollback.
|
|
assert.commandFailedWithCode(
|
|
rollbackNode.getDB(dbName).runCommand({"getMore": idleCursorId, collection: collName}),
|
|
ErrorCodes.QueryPlanKilled);
|
|
|
|
// Check the replica set.
|
|
rollbackTest.stop();
|
|
}());
|