239 lines
8.9 KiB
JavaScript
239 lines
8.9 KiB
JavaScript
/**
|
|
* Loading this file extends the prototype for ReplSetTest to spawn a thread, which continuously
|
|
* step down the primary.
|
|
*/
|
|
|
|
// Contains the declaration for ScopedThread and CountDownLatch
|
|
load('jstests/libs/parallelTester.js');
|
|
load("jstests/replsets/rslib.js");
|
|
|
|
/**
|
|
* Executes the specified function and if it fails due to exception, which is related to network
|
|
* error retries the call once. If the second attempt also fails, simply throws the last
|
|
* exception.
|
|
*
|
|
* Returns the return value of the input call.
|
|
*/
|
|
function retryOnNetworkError(func) {
|
|
var networkErrorRetriesLeft = 1;
|
|
|
|
while (true) {
|
|
try {
|
|
return func();
|
|
} catch (e) {
|
|
if (e.toString().indexOf("network error") > -1 && networkErrorRetriesLeft > 0) {
|
|
print("Network error occurred and the call will be retried: " +
|
|
tojson({error: e.toString(), stack: e.stack}));
|
|
networkErrorRetriesLeft--;
|
|
} else {
|
|
throw e;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
(function() {
|
|
'use strict';
|
|
|
|
// Preserve the original ReplSetTest and ShardingTest constructors, because we are overriding
|
|
// them
|
|
var originalReplSetTest = ReplSetTest;
|
|
var originalShardingTest = ShardingTest;
|
|
|
|
/**
|
|
* Overrides the ReplSetTest constructor to start the continuous config server stepdown thread.
|
|
*/
|
|
ReplSetTest = function ReplSetTestWithContinuousPrimaryStepdown() {
|
|
// Construct the original object
|
|
originalReplSetTest.apply(this, arguments);
|
|
|
|
/**
|
|
* This function is intended to be called in a separate thread and it continuously steps
|
|
* down the current primary for a number of attempts.
|
|
*
|
|
* @param {string} seedNode The connection string of a node from which to discover the
|
|
* primary of the replica set.
|
|
* @param {CountDownLatch} stopCounter Object, which can be used to stop the thread.
|
|
*
|
|
* @return Object with the following fields:
|
|
* ok {integer}: 0 if it failed, 1 if it succeeded.
|
|
* error {string}: Only present if ok == 0. Contains the cause for the error.
|
|
* stack {string}: Only present if ok == 0. Contains the stack at the time of the
|
|
* error.
|
|
*/
|
|
function _continuousPrimaryStepdownFn(seedNode, stopCounter) {
|
|
'use strict';
|
|
|
|
load('jstests/libs/override_methods/sharding_continuous_config_stepdown.js');
|
|
|
|
var stepdownDelaySeconds = 10;
|
|
|
|
print('*** Continuous stepdown thread running with seed node ' + seedNode);
|
|
|
|
try {
|
|
// The config primary may unexpectedly step down during startup if under heavy load
|
|
// and too slowly processing heartbeats. When it steps down, it closes all of its
|
|
// connections. This can happen during the call to new ReplSetTest, so in order to
|
|
// account for this and make the tests stable, retry discovery of the replica set's
|
|
// configuration once (SERVER-22794).
|
|
var replSet = retryOnNetworkError(function() {
|
|
return new ReplSetTest(seedNode);
|
|
});
|
|
|
|
var primary = replSet.getPrimary();
|
|
|
|
while (stopCounter.getCount() > 0) {
|
|
print('*** Stepping down ' + primary);
|
|
|
|
assert.throws(function() {
|
|
var result = primary.adminCommand(
|
|
{replSetStepDown: stepdownDelaySeconds, force: true});
|
|
print('replSetStepDown command did not throw and returned: ' +
|
|
tojson(result));
|
|
|
|
// The call to replSetStepDown should never succeed
|
|
assert.commandWorked(result);
|
|
});
|
|
|
|
// Wait for primary to get elected and allow the test to make some progress
|
|
// before
|
|
// attempting another stepdown.
|
|
if (stopCounter.getCount() > 0)
|
|
primary = replSet.getPrimary();
|
|
|
|
if (stopCounter.getCount() > 0)
|
|
sleep(8000);
|
|
}
|
|
|
|
print('*** Continuous stepdown thread completed successfully');
|
|
return {ok: 1};
|
|
} catch (e) {
|
|
print('*** Continuous stepdown thread caught exception: ' + tojson(e));
|
|
return {ok: 0, error: e.toString(), stack: e.stack};
|
|
}
|
|
}
|
|
|
|
// Preserve the original stopSet method, because we are overriding it to stop the continuous
|
|
// stepdown thread.
|
|
var _originalStartSetFn = this.startSet;
|
|
var _originalStopSetFn = this.stopSet;
|
|
|
|
// We override these methods to retry on network errors
|
|
var _originalAwaitLastOpCommitted = this.awaitLastOpCommitted;
|
|
|
|
// These two manage the scoped failover thread
|
|
var _scopedPrimaryStepdownThread;
|
|
var _scopedPrimaryStepdownThreadStopCounter;
|
|
|
|
/**
|
|
* Overrides the startSet call so we can increase the logging verbosity
|
|
*/
|
|
this.startSet = function(options) {
|
|
if (!options) {
|
|
options = {};
|
|
}
|
|
options.verbose = 2;
|
|
return _originalStartSetFn.call(this, options);
|
|
};
|
|
|
|
/**
|
|
* Overrides the stopSet call so it terminates the failover thread.
|
|
*/
|
|
this.stopSet = function() {
|
|
this.stopContinuousFailover();
|
|
_originalStopSetFn.apply(this, arguments);
|
|
};
|
|
|
|
/**
|
|
* Overrides the awaitLastOpCommitted to retry on network errors.
|
|
*/
|
|
this.awaitLastOpCommitted = function() {
|
|
return retryOnNetworkError(_originalAwaitLastOpCommitted.bind(this));
|
|
};
|
|
|
|
/**
|
|
* Spawns a thread to invoke continuousPrimaryStepdownFn. See its comments for more
|
|
* information.
|
|
*/
|
|
this.startContinuousFailover = function() {
|
|
if (_scopedPrimaryStepdownThread) {
|
|
throw new Error('Continuous failover thread is already active');
|
|
}
|
|
|
|
_scopedPrimaryStepdownThreadStopCounter = new CountDownLatch(1);
|
|
_scopedPrimaryStepdownThread =
|
|
new ScopedThread(_continuousPrimaryStepdownFn,
|
|
this.nodes[0].host,
|
|
_scopedPrimaryStepdownThreadStopCounter);
|
|
_scopedPrimaryStepdownThread.start();
|
|
};
|
|
|
|
/**
|
|
* Blocking method, which tells the thread running continuousPrimaryStepdownFn to stop and
|
|
* waits
|
|
* for it to terminate.
|
|
*/
|
|
this.stopContinuousFailover = function() {
|
|
if (!_scopedPrimaryStepdownThread) {
|
|
return;
|
|
}
|
|
|
|
_scopedPrimaryStepdownThreadStopCounter.countDown();
|
|
_scopedPrimaryStepdownThreadStopCounter = null;
|
|
|
|
_scopedPrimaryStepdownThread.join();
|
|
|
|
var retVal = _scopedPrimaryStepdownThread.returnData();
|
|
_scopedPrimaryStepdownThread = null;
|
|
|
|
return assert.commandWorked(retVal);
|
|
};
|
|
};
|
|
|
|
Object.extend(ReplSetTest, originalReplSetTest);
|
|
|
|
/**
|
|
* Overrides the ShardingTest constructor to start the continuous config server stepdown thread.
|
|
*/
|
|
ShardingTest = function ShardingTestWithContinuousConfigPrimaryStepdown() {
|
|
if (!arguments[0].other) {
|
|
arguments[0].other = {};
|
|
}
|
|
arguments[0].verbose = 2;
|
|
|
|
// Set electionTimeoutMillis to 5 seconds, from 10, so that chunk migrations don't
|
|
// time out because of the CSRS primary being down so often for so long.
|
|
arguments[0].configReplSetTestOptions =
|
|
Object.merge(arguments[0].configReplSetTestOptions, {
|
|
settings: {
|
|
electionTimeoutMillis: 5000,
|
|
},
|
|
});
|
|
|
|
// Construct the original object
|
|
originalShardingTest.apply(this, arguments);
|
|
|
|
if (!this.configRS) {
|
|
throw new Error('Continuous config server step down only available with CSRS');
|
|
}
|
|
|
|
/**
|
|
* This method is disabled because it runs aggregation, which doesn't handle config server
|
|
* stepdown correctly.
|
|
*/
|
|
this.printShardingStatus = function() {
|
|
|
|
};
|
|
|
|
assert.eq(this.configRS.getReplSetConfigFromNode().settings.electionTimeoutMillis,
|
|
5000,
|
|
"Failed to set the electionTimeoutMillis to 5000 milliseconds");
|
|
|
|
// Start the continuous config server stepdown thread
|
|
this.configRS.startContinuousFailover();
|
|
};
|
|
|
|
Object.extend(ShardingTest, originalShardingTest);
|
|
|
|
})();
|