Files
mongo/jstests/serverless/shard_split_performance_test.js

132 lines
5.3 KiB
JavaScript

/**
*
* Runs a number of shard split and moveChunk to compare the time it takes for both operations.
* @tags: [requires_fcv_62, serverless]
*/
import {assertMigrationState, ShardSplitTest} from "jstests/serverless/libs/shard_split_test.js";
load("jstests/replsets/rslib.js");
function runOneMoveChunk() {
'use strict';
load("jstests/sharding/libs/find_chunks_util.js");
var st = new ShardingTest({mongos: 1, shards: 2});
var kDbName = 'db';
var mongos = st.s0;
var shard0 = st.shard0.shardName;
var shard1 = st.shard1.shardName;
assert.commandWorked(mongos.adminCommand({enableSharding: kDbName}));
st.ensurePrimaryShard(kDbName, shard0);
const keyDoc = {a: 1};
var ns = kDbName + '.foo';
// Fail if find is not a valid shard key.
assert.commandWorked(mongos.adminCommand({shardCollection: ns, key: keyDoc}));
var chunkId = findChunksUtil.findOneChunkByNs(mongos.getDB('config'), ns, {shard: shard0})._id;
assert.commandWorked(mongos.adminCommand({moveChunk: ns, find: keyDoc, to: shard1}));
assert.eq(shard1, mongos.getDB('config').chunks.findOne({_id: chunkId}).shard);
const kMoveChunkLog = "ctx\":\"MoveChunk\",\"msg\":\"Exiting commit critical section";
const moveMsg = checkLog.getLogMessage(st.shard0, kMoveChunkLog);
assert(moveMsg);
const moveJson = JSON.parse(moveMsg);
const duration = parseInt(moveJson.attr.durationMillis, 10);
mongos.getDB(kDbName).foo.drop();
st.stop();
jsTestLog(`moveChunk critical section lasted ${duration} ms`);
return duration;
}
function extractTs(message) {
assert(message);
const msgJson = JSON.parse(message);
return Date.parse(msgJson.t["$date"]);
}
function runOneSplit() {
"use strict";
const test = new ShardSplitTest({quickGarbageCollection: true});
test.addAndAwaitRecipientNodes();
const primary = test.donor.getPrimary();
const tenantIds = [ObjectId(), ObjectId()];
const operation = test.createSplitOperation(tenantIds);
assert.commandWorked(operation.commit());
test.removeRecipientNodesFromDonor();
assertMigrationState(test.donor.getPrimary(), operation.migrationId, "committed");
const kEnterBlockingState = "Entering 'blocking' state.";
const kWaitingForCatchup = "Waiting for recipient nodes to reach block timestamp.";
const kApplyingSplitConfig = "Applying the split config.";
const kTriggeringRecipientElection =
"Triggering an election after recipient has accepted the split.";
const kWaitForMajorityWrite = "Waiting for majority commit on recipient primary";
const kCommitted = "Entering 'committed' state.";
const kDecisionReached = "Shard split decision reached";
const report = {
// shard split critical section:
// - wait to abort index builds
waitToAbortIndexBuilds: extractTs(checkLog.getLogMessage(primary, kWaitingForCatchup)) -
extractTs(checkLog.getLogMessage(primary, kEnterBlockingState)),
// - wait for recipient nodes to catch up with blockTimestamp
waitForRecipientCatchup: extractTs(checkLog.getLogMessage(primary, kApplyingSplitConfig)) -
extractTs(checkLog.getLogMessage(primary, kWaitingForCatchup)),
// - wait for split acceptance
waitForSplitAcceptance:
extractTs(checkLog.getLogMessage(primary, kTriggeringRecipientElection)) -
extractTs(checkLog.getLogMessage(primary, kApplyingSplitConfig)),
// - wait for recipient primary to step up
waitForRecipientStepUp: extractTs(checkLog.getLogMessage(primary, kWaitForMajorityWrite)) -
extractTs(checkLog.getLogMessage(primary, kTriggeringRecipientElection)),
// - wait for recipient to majority commit write
waitForRecipientMajorityWrite: extractTs(checkLog.getLogMessage(primary, kCommitted)) -
extractTs(checkLog.getLogMessage(primary, kWaitForMajorityWrite)),
// - total duration
totalDuration: extractTs(checkLog.getLogMessage(primary, kDecisionReached)) -
extractTs(checkLog.getLogMessage(primary, kEnterBlockingState)),
totalDurationWithoutCatchup: extractTs(checkLog.getLogMessage(primary, kDecisionReached)) -
extractTs(checkLog.getLogMessage(primary, kApplyingSplitConfig)),
};
jsTestLog(`Shard split performance report: ${tojson(report)}`);
// Validate using metrics from log output
const maxCriticalSectionDurationWithoutCatchup = 500;
assert.lt(report.totalDurationWithoutCatchup,
maxCriticalSectionDurationWithoutCatchup,
`The total duration without recipient catchup must be less than ${
maxCriticalSectionDurationWithoutCatchup}ms`);
// Validate using reported values in serverStatus
const donorPrimary = test.donor.getPrimary();
const serverStatus = assert.commandWorked(donorPrimary.adminCommand({serverStatus: 1}));
const splitStats = serverStatus.shardSplits;
assert.eq(splitStats.totalCommitted, 1);
assert.lt(splitStats.totalCommittedDurationWithoutCatchupMillis,
maxCriticalSectionDurationWithoutCatchup,
`The total duration without recipient catchup must be less than ${
maxCriticalSectionDurationWithoutCatchup}ms`);
test.stop();
}
runOneMoveChunk();
runOneSplit();