Files
mongo/jstests/serverless/shard_split_performance_test.js
2023-02-09 19:51:19 +00:00

162 lines
5.5 KiB
JavaScript

/**
*
* Runs a number of shard split and moveChunk to compare the time it takes for both operations.
* @tags: [requires_fcv_63, serverless]
*/
import {assertMigrationState, ShardSplitTest} from "jstests/serverless/libs/shard_split_test.js";
load("jstests/replsets/rslib.js");
function runOneMoveChunk() {
'use strict';
load("jstests/sharding/libs/find_chunks_util.js");
var st = new ShardingTest({mongos: 1, shards: 2});
var kDbName = 'db';
var mongos = st.s0;
var shard0 = st.shard0.shardName;
var shard1 = st.shard1.shardName;
assert.commandWorked(mongos.adminCommand({enableSharding: kDbName}));
st.ensurePrimaryShard(kDbName, shard0);
const keyDoc = {a: 1};
var ns = kDbName + '.foo';
// Fail if find is not a valid shard key.
assert.commandWorked(mongos.adminCommand({shardCollection: ns, key: keyDoc}));
var chunkId = findChunksUtil.findOneChunkByNs(mongos.getDB('config'), ns, {shard: shard0})._id;
assert.commandWorked(mongos.adminCommand({moveChunk: ns, find: keyDoc, to: shard1}));
assert.eq(shard1, mongos.getDB('config').chunks.findOne({_id: chunkId}).shard);
const kMoveChunkLog = "ctx\":\"MoveChunk\",\"msg\":\"Exiting commit critical section";
const moveMsg = checkLog.getLogMessage(st.shard0, kMoveChunkLog);
assert(moveMsg);
const moveJson = JSON.parse(moveMsg);
const duration = parseInt(moveJson.attr.durationMillis, 10);
mongos.getDB(kDbName).foo.drop();
st.stop();
jsTestLog(`moveChunk critical section lasted ${duration} ms`);
return duration;
}
function extractTs(message) {
assert(message);
const msgJson = JSON.parse(message);
return Date.parse(msgJson.t["$date"]);
}
function printLog(connection) {
const log = cat(connection.fullOptions.logFile);
jsTestLog(`Printing log for ${connection}`);
for (let line of log.split("\n")) {
print(`d${connection.port} | ${line}`);
}
return log;
}
function extractTimingsForSplitSteps(connection) {
const log = printLog(connection);
const logLines = {
enterBlockingState: "Entering 'blocking' state.",
waitingForCatchup: "Waiting for recipient nodes to reach block timestamp.",
applyingSplitConfig: "Applying the split config.",
triggeringRecipientElection:
"Triggering an election after recipient has accepted the split.",
waitForMajorityWrite: "Waiting for majority commit on recipient primary",
committed: "Entering 'committed' state.",
decisionReached: "Shard split decision reached",
};
const result = {};
for (let line of log.split("\n")) {
for (let key in logLines) {
if (line.includes(logLines[key])) {
result[key] = extractTs(line);
break;
}
}
}
return result;
}
function runOneSplit() {
"use strict";
const test = new ShardSplitTest({
quickGarbageCollection: true,
nodeOptions: {
useLogFiles: true,
setParameter: {logComponentVerbosity: tojson({replication: 4, command: 4})}
}
});
test.addAndAwaitRecipientNodes();
const tenantIds = [ObjectId(), ObjectId()];
const operation = test.createSplitOperation(tenantIds);
assert.commandWorked(operation.commit());
test.removeRecipientNodesFromDonor();
const donorPrimary = test.donor.getPrimary();
assertMigrationState(donorPrimary, operation.migrationId, "committed");
const result = extractTimingsForSplitSteps(donorPrimary);
test.donor.getSecondaries().forEach(node => {
printLog(node);
});
const report = {
// shard split critical section:
// - wait to abort index builds
waitToAbortIndexBuilds: result.waitingForCatchup - result.enterBlockingState,
// - wait for recipient nodes to catch up with blockTimestamp
waitForRecipientCatchup: result.applyingSplitConfig - result.waitingForCatchup,
// - wait for split acceptance
waitForSplitAcceptance: result.triggeringRecipientElection - result.applyingSplitConfig,
// - wait for recipient primary to step up
waitForRecipientStepUp: result.waitForMajorityWrite - result.triggeringRecipientElection,
// - wait for recipient to majority commit write
waitForRecipientMajorityWrite: result.committed - result.waitForMajorityWrite,
// - total duration
totalDuration: result.decisionReached - result.enterBlockingState,
totalDurationWithoutCatchup: result.decisionReached - result.applyingSplitConfig,
};
jsTestLog(`Shard split performance report: ${tojson(report)}`);
// Validate using metrics from log output
const maxCriticalSectionDurationWithoutCatchup = 500;
assert.lt(report.totalDurationWithoutCatchup,
maxCriticalSectionDurationWithoutCatchup,
`The total duration without recipient catchup must be less than ${
maxCriticalSectionDurationWithoutCatchup}ms`);
// Validate using reported values in serverStatus
const serverStatus = assert.commandWorked(donorPrimary.adminCommand({serverStatus: 1}));
const splitStats = serverStatus.shardSplits;
assert.eq(splitStats.totalCommitted, 1);
assert.lt(splitStats.totalCommittedDurationWithoutCatchupMillis,
maxCriticalSectionDurationWithoutCatchup,
`The total duration without recipient catchup must be less than ${
maxCriticalSectionDurationWithoutCatchup}ms`);
test.stop();
}
runOneMoveChunk();
runOneSplit();