Files
mongo/jstests/replsets/read_operations_during_rollback.js
David Storch c54afe747a SERVER-54624 make SBE check for a change in catalog epoch during yield recovery
Catalog epoch changes happen during rollback. This fixes a
bug where a getMore against an SBE cursor could crash the
server if the cursor survived a rollback.
2021-03-25 15:56:04 +00:00

120 lines
5.2 KiB
JavaScript

/*
* This test makes sure 'find' and 'getMore' commands fail correctly during rollback.
*
* @tags: [
* # The 'getMoreHangAfterPinCursor' failpoint is not present in 4.4.
* requires_fcv_49,
* requires_majority_read_concern,
* ]
*/
(function() {
"use strict";
load("jstests/replsets/libs/rollback_test.js");
load("jstests/replsets/rslib.js");
const dbName = "test";
const collName = "coll";
// Set up Rollback Test.
let rollbackTest = new RollbackTest();
// Insert documents to be read later.
assert.commandWorked(rollbackTest.getPrimary().getDB(dbName)[collName].insert([{}, {}, {}]));
let rollbackNode = rollbackTest.transitionToRollbackOperations();
// Open a cursor on 'rollbackNode' which returns partial results, but will remain open and idle
// during the rollback process.
const findCmdRes =
assert.commandWorked(rollbackNode.getDB(dbName).runCommand({"find": collName, batchSize: 2}));
assert.eq(2, findCmdRes.cursor.firstBatch.length, findCmdRes);
const idleCursorId = findCmdRes.cursor.id;
assert.neq(0, idleCursorId, findCmdRes);
setFailPoint(rollbackNode, "rollbackHangAfterTransitionToRollback");
setFailPoint(rollbackNode, "getMoreHangAfterPinCursor");
const joinGetMoreThread = startParallelShell(() => {
db.getMongo().setSecondaryOk();
const cursorID = assert.commandWorked(db.runCommand({"find": "coll", batchSize: 0})).cursor.id;
// Make sure an outstanding read operation gets killed during rollback even though the read
// was started before rollback. Outstanding read operations are killed during rollback and
// their connections are closed shortly after. So we would get either an error
// (InterruptedDueToReplStateChange) if the error message is sent out and received before
// the connection is closed or a network error exception.
try {
assert.commandFailedWithCode(db.runCommand({"getMore": cursorID, collection: "coll"}),
ErrorCodes.InterruptedDueToReplStateChange);
} catch (e) {
assert.includes(e.toString(), "network error while attempting to run command");
}
}, rollbackNode.port);
const cursorIdToBeReadDuringRollback =
assert.commandWorked(rollbackNode.getDB(dbName).runCommand({"find": collName, batchSize: 0}))
.cursor.id;
// Wait for 'getMore' to hang on the test collection.
assert.soonNoExcept(() => {
const filter = {"command.getMore": {$exists: true}, "command.collection": collName};
return rollbackNode.getDB(dbName).adminCommand("currentOp", filter).inprog.length === 1;
});
// Start rollback.
rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
rollbackTest.transitionToSyncSourceOperationsDuringRollback();
jsTestLog("Reconnecting to " + rollbackNode.host + " after rollback");
reconnect(rollbackNode.getDB(dbName));
// Wait for rollback to hang.
checkLog.contains(rollbackNode, "rollbackHangAfterTransitionToRollback fail point enabled.");
clearFailPoint(rollbackNode, "getMoreHangAfterPinCursor");
jsTestLog("Wait for 'getMore' thread to join.");
joinGetMoreThread();
jsTestLog("Reading during rollback.");
// Make sure that read operations fail during rollback.
assert.commandFailedWithCode(rollbackNode.getDB(dbName).runCommand({"find": collName}),
ErrorCodes.NotPrimaryOrSecondary);
assert.commandFailedWithCode(rollbackNode.getDB(dbName).runCommand(
{"getMore": cursorIdToBeReadDuringRollback, collection: collName}),
ErrorCodes.NotPrimaryOrSecondary);
// Disable the best-effort check for primary-ness in the service entry point, so that we
// exercise the real check for primary-ness in 'find' and 'getMore' commands.
setFailPoint(rollbackNode, "skipCheckingForNotPrimaryInCommandDispatch");
jsTestLog("Reading during rollback (again with command dispatch checks disabled).");
assert.commandFailedWithCode(rollbackNode.getDB(dbName).runCommand({"find": collName}),
ErrorCodes.NotPrimaryOrSecondary);
assert.commandFailedWithCode(rollbackNode.getDB(dbName).runCommand(
{"getMore": cursorIdToBeReadDuringRollback, collection: collName}),
ErrorCodes.NotPrimaryOrSecondary);
clearFailPoint(rollbackNode, "rollbackHangAfterTransitionToRollback");
rollbackTest.transitionToSteadyStateOperations();
const replMetrics = assert.commandWorked(rollbackNode.adminCommand({serverStatus: 1})).metrics.repl;
assert.eq(replMetrics.stateTransition.lastStateTransition, "rollback");
assert(replMetrics.stateTransition.userOperationsRunning,
() => "Response should have a 'stateTransition.userOperationsRunning' field: " +
tojson(replMetrics));
assert(replMetrics.stateTransition.userOperationsKilled,
() => "Response should have a 'stateTransition.userOperationsKilled' field: " +
tojson(replMetrics));
// Run a getMore against the idle cursor that remained open throughout the rollback. The getMore
// should fail since the cursor has been invalidated by the rollback.
assert.commandFailedWithCode(
rollbackNode.getDB(dbName).runCommand({"getMore": idleCursorId, collection: collName}),
ErrorCodes.QueryPlanKilled);
// Check the replica set.
rollbackTest.stop();
}());