Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
08077e4d70 | ||
|
|
4f9e10bb30 | ||
|
|
894acad323 | ||
|
|
a86bb832e1 | ||
|
|
3276296336 | ||
|
|
de31372c6f | ||
|
|
814159344a | ||
|
|
58d6f304e9 | ||
|
|
64ae4a253e |
@@ -59,7 +59,7 @@ class EvgExpansions(BaseModel):
|
||||
build_id: str
|
||||
build_variant: str
|
||||
exec_timeout_secs: Optional[int] = None
|
||||
is_patch: Optional[bool]
|
||||
is_patch: Optional[str]
|
||||
project: str
|
||||
max_tests_per_suite: Optional[int] = 100
|
||||
max_sub_suites: Optional[int] = 5
|
||||
@@ -83,10 +83,21 @@ class EvgExpansions(BaseModel):
|
||||
|
||||
def get_max_sub_suites(self) -> int:
|
||||
"""Get the max_sub_suites to use."""
|
||||
if not self.is_patch:
|
||||
if not self.determine_is_patch():
|
||||
return self.mainline_max_sub_suites
|
||||
return self.max_sub_suites
|
||||
|
||||
def determine_is_patch(self) -> bool:
|
||||
"""
|
||||
Determine if expansions indicate whether the script is being run in a patch build.
|
||||
|
||||
In a patch build, the `is_patch` expansion will be the string value of "true". In a
|
||||
non-patch setting, it will not exist, or be an empty string.
|
||||
|
||||
:return: True if task is being run in a patch build.
|
||||
"""
|
||||
return self.is_patch is not None and self.is_patch.lower() == "true"
|
||||
|
||||
def build_suite_split_config(self, start_date: datetime,
|
||||
end_date: datetime) -> SuiteSplitConfig:
|
||||
"""
|
||||
@@ -113,7 +124,7 @@ class EvgExpansions(BaseModel):
|
||||
"""
|
||||
return GenTaskOptions(
|
||||
create_misc_suite=True,
|
||||
is_patch=self.is_patch,
|
||||
is_patch=self.determine_is_patch(),
|
||||
generated_config_dir=GENERATED_CONFIG_DIR,
|
||||
use_default_timeouts=False,
|
||||
timeout_secs=self.timeout_secs,
|
||||
|
||||
@@ -2,15 +2,69 @@
|
||||
|
||||
cd $HOME # workaround EVG-12829
|
||||
|
||||
# Communicate to users that logged in before the script started that nothing is ready.
|
||||
wall "The setup_spawnhost_coredump script has just started setting up the debugging environment."
|
||||
unameOut=$(uname -s)
|
||||
case "${unameOut}" in
|
||||
Linux*) machine=Linux;;
|
||||
Darwin*) machine=Mac;;
|
||||
CYGWIN*) machine=Cygwin;;
|
||||
*) machine="UNKNOWN:${unameOut}"
|
||||
esac
|
||||
|
||||
# Write this file that gets cat'ed on login to communicate to users logging in if this setup script is still running.
|
||||
echo '+-----------------------------------------------------------------+' > ~/.setup_spawnhost_coredump_progress
|
||||
echo '| The setup script is still setting up data files for inspection. |' >> ~/.setup_spawnhost_coredump_progress
|
||||
echo '+-----------------------------------------------------------------+' >> ~/.setup_spawnhost_coredump_progress
|
||||
if [[ "${machine}" = "Cygwin" ]]; then
|
||||
out_dir="/cygdrive/c/setup_script_output.txt"
|
||||
desktop_dir="/cygdrive/c/Users/Administrator/Desktop"
|
||||
|
||||
cat >> ~/.profile <<EOF
|
||||
{
|
||||
date
|
||||
env
|
||||
|
||||
echo "----------------------"
|
||||
echo -e "\n=> Setting _NT_SOURCE_PATH environment variable for debuggers to pick up source files."
|
||||
src_dir_hash=$(readlink -f /cygdrive/z/data/mci/source-*)
|
||||
full_src_dir="${src_dir_hash}/src"
|
||||
echo "Source Path: [${full_src_dir}]"
|
||||
set -x;
|
||||
setx _NT_SOURCE_PATH "${full_src_dir}"
|
||||
{ set +x; } 2>/dev/null
|
||||
|
||||
echo -e "\n=> Setting _NT_SYMBOL_PATH environment variable for debuggers to pick up the symbols."
|
||||
sym_parent_dir=$(readlink -f /cygdrive/z/data/mci/artifacts-*dist_test_debug)
|
||||
sym_dir=$(readlink -f ${sym_parent_dir}/debugsymbols-mongodb*zip)
|
||||
sym_extracted_dir="${sym_parent_dir}/extracted_symbols"
|
||||
full_sym_dir="${sym_extracted_dir}/dist-test/bin"
|
||||
echo "Symbols Dir: [${full_sym_dir}]"
|
||||
|
||||
echo -e "\n=> Extracting Symbol files."
|
||||
set -x;
|
||||
mkdir ${sym_extracted_dir}
|
||||
unzip -n ${sym_dir} -d ${sym_extracted_dir}
|
||||
setx _NT_SYMBOL_PATH "${full_sym_dir};srv*;"
|
||||
{ set +x; } 2>/dev/null
|
||||
|
||||
echo -e "\n=> Extracting Core Dump to Desktop."
|
||||
full_dump_dir=$(readlink -f /cygdrive/z/data/mci/artifacts-* | grep -v dist_test)
|
||||
full_dump_parent_dir=$(readlink -f ${full_dump_dir}/mongo-coredumps*tgz)
|
||||
extracted_dump_dir="${full_dump_dir}/extracted_dump"
|
||||
set -x;
|
||||
mkdir ${extracted_dump_dir}
|
||||
tar -xzvf ${full_dump_parent_dir} -C ${extracted_dump_dir}
|
||||
cp ${extracted_dump_dir}/* ${desktop_dir}
|
||||
{ set +x; } 2>/dev/null
|
||||
echo "Copied to Desktop."
|
||||
|
||||
} &> ${out_dir}
|
||||
|
||||
cp ${out_dir} ${desktop_dir}
|
||||
else
|
||||
# Communicate to users that logged in before the script started that nothing is ready.
|
||||
wall "The setup_spawnhost_coredump script has just started setting up the debugging environment."
|
||||
|
||||
# Write this file that gets cat'ed on login to communicate to users logging in if this setup script is still running.
|
||||
echo '+-----------------------------------------------------------------------------------+' > ~/.setup_spawnhost_coredump_progress
|
||||
echo '| The setup script is still setting up data files for inspection on a [${machine}] host. |' >> ~/.setup_spawnhost_coredump_progress
|
||||
echo '+-----------------------------------------------------------------------------------+' >> ~/.setup_spawnhost_coredump_progress
|
||||
|
||||
cat >> ~/.profile <<EOF
|
||||
cat ~/.setup_spawnhost_coredump_progress
|
||||
# Coredumps generated by a toolchain built mongodb can be problematic when examined with the system
|
||||
# gdb.
|
||||
@@ -33,54 +87,54 @@ echo "To examine a core dump, type 'gdb ./<binary> ./<core file>'"
|
||||
cat ~/.setup_spawnhost_coredump_progress
|
||||
EOF
|
||||
|
||||
export PATH=/opt/mongodbtoolchain/gdb/bin:$PATH
|
||||
echo 'if [ -f ~/.profile ]; then
|
||||
export PATH=/opt/mongodbtoolchain/gdb/bin:$PATH
|
||||
echo 'if [ -f ~/.profile ]; then
|
||||
. ~/.profile
|
||||
fi' >> .bash_profile
|
||||
|
||||
# Make a directory on the larger EBS volume. Soft-link it under the home directory. The smaller home
|
||||
# volume can have trouble particularly with coredumps from sharded timeouts.
|
||||
mkdir /data/debug
|
||||
ln -s /data/debug .
|
||||
cd debug
|
||||
# Make a directory on the larger EBS volume. Soft-link it under the home directory. The smaller home
|
||||
# volume can have trouble particularly with coredumps from sharded timeouts.
|
||||
mkdir /data/debug
|
||||
ln -s /data/debug .
|
||||
cd debug
|
||||
|
||||
# As the name suggests, pretty printers. Primarily for boost::optional<T>
|
||||
git clone git@github.com:ruediger/Boost-Pretty-Printer.git &
|
||||
# As the name suggests, pretty printers. Primarily for boost::optional<T>
|
||||
git clone git@github.com:ruediger/Boost-Pretty-Printer.git &
|
||||
|
||||
# Discover and unarchive necessary files and source code. This will put mongo binaries and their
|
||||
# partner .debug files in the same `debug/bin` directory. The `bin` directory will later be symbolic
|
||||
# linked into the top-level (`debug`) directory. Shared library files and their debug symbols will
|
||||
# be dumped into a `debug/lib` directory for tidiness. The mongo `<reporoot>/src/` directory is soft
|
||||
# linked as `debug/src`. The .gdbinit file assumes gdb is being run from the `debug` directory.
|
||||
BIN_ARCHIVE=`ls /data/mci/artifacts-*archive_dist_test*/mongo-*.tgz`
|
||||
tar --wildcards --strip-components=1 -xzf $BIN_ARCHIVE '*/bin/mongod' '*/bin/mongos' '*/bin/mongo' '*/bin/mongobridge' &
|
||||
tar --wildcards --strip-components=1 -xzf $BIN_ARCHIVE '*/lib/*' &
|
||||
DBG_ARCHIVE=`ls /data/mci/artifacts-*archive_dist_test_debug/debugsymbols-*.tgz`
|
||||
tar --wildcards --strip-components=1 -xzf $DBG_ARCHIVE '*/bin/mongod.debug' '*/bin/mongos.debug' '*/bin/mongo.debug' '*/bin/mongobridge.debug' &
|
||||
tar --wildcards --strip-components=1 -xzf $DBG_ARCHIVE '*/lib/*' &
|
||||
UNITTEST_ARCHIVE=`ls /data/mci/artifacts-*run_unittests/mongo-unittests-*.tgz`
|
||||
tar --wildcards --strip-components=0 -xzf $UNITTEST_ARCHIVE 'bin/*' &
|
||||
tar --wildcards -xzf $UNITTEST_ARCHIVE 'lib/*' &
|
||||
# Discover and unarchive necessary files and source code. This will put mongo binaries and their
|
||||
# partner .debug files in the same `debug/bin` directory. The `bin` directory will later be symbolic
|
||||
# linked into the top-level (`debug`) directory. Shared library files and their debug symbols will
|
||||
# be dumped into a `debug/lib` directory for tidiness. The mongo `<reporoot>/src/` directory is soft
|
||||
# linked as `debug/src`. The .gdbinit file assumes gdb is being run from the `debug` directory.
|
||||
BIN_ARCHIVE=`ls /data/mci/artifacts-*archive_dist_test*/mongo-*.tgz`
|
||||
tar --wildcards --strip-components=1 -xzf $BIN_ARCHIVE '*/bin/mongod' '*/bin/mongos' '*/bin/mongo' '*/bin/mongobridge' &
|
||||
tar --wildcards --strip-components=1 -xzf $BIN_ARCHIVE '*/lib/*' &
|
||||
DBG_ARCHIVE=`ls /data/mci/artifacts-*archive_dist_test_debug/debugsymbols-*.tgz`
|
||||
tar --wildcards --strip-components=1 -xzf $DBG_ARCHIVE '*/bin/mongod.debug' '*/bin/mongos.debug' '*/bin/mongo.debug' '*/bin/mongobridge.debug' &
|
||||
tar --wildcards --strip-components=1 -xzf $DBG_ARCHIVE '*/lib/*' &
|
||||
UNITTEST_ARCHIVE=`ls /data/mci/artifacts-*run_unittests/mongo-unittests-*.tgz`
|
||||
tar --wildcards --strip-components=0 -xzf $UNITTEST_ARCHIVE 'bin/*' &
|
||||
tar --wildcards -xzf $UNITTEST_ARCHIVE 'lib/*' &
|
||||
|
||||
SRC_DIR=`find /data/mci/ -maxdepth 1 | grep source`
|
||||
ln -s $SRC_DIR/.gdbinit .
|
||||
ln -s $SRC_DIR/src src
|
||||
ln -s $SRC_DIR/buildscripts buildscripts
|
||||
SRC_DIR=`find /data/mci/ -maxdepth 1 | grep source`
|
||||
ln -s $SRC_DIR/.gdbinit .
|
||||
ln -s $SRC_DIR/src src
|
||||
ln -s $SRC_DIR/buildscripts buildscripts
|
||||
|
||||
# Install pymongo to get the bson library for pretty-printers.
|
||||
/opt/mongodbtoolchain/v3/bin/pip3 install -r $SRC_DIR/etc/pip/dev-requirements.txt &
|
||||
# Install pymongo to get the bson library for pretty-printers.
|
||||
/opt/mongodbtoolchain/v3/bin/pip3 install -r $SRC_DIR/etc/pip/dev-requirements.txt &
|
||||
|
||||
COREDUMP_ARCHIVE=`ls /data/mci/artifacts-*/mongo-coredumps-*.tgz`
|
||||
tar -xzf $COREDUMP_ARCHIVE &
|
||||
echo "Waiting for background processes to complete."
|
||||
wait
|
||||
COREDUMP_ARCHIVE=`ls /data/mci/artifacts-*/mongo-coredumps-*.tgz`
|
||||
tar -xzf $COREDUMP_ARCHIVE &
|
||||
echo "Waiting for background processes to complete."
|
||||
wait
|
||||
|
||||
# Symbolic linking all of the executable files is sufficient for `gdb ./mongod ./dump_mongod.core`
|
||||
# to succeed. This inadvertantly also links in the ".debug" files which is unnecessary, but
|
||||
# harmless. gdb expects the .debug files to live adjacent to the physical binary.
|
||||
ln -s bin/* ./
|
||||
# Symbolic linking all of the executable files is sufficient for `gdb ./mongod ./dump_mongod.core`
|
||||
# to succeed. This inadvertantly also links in the ".debug" files which is unnecessary, but
|
||||
# harmless. gdb expects the .debug files to live adjacent to the physical binary.
|
||||
ln -s bin/* ./
|
||||
|
||||
cat >> ~/.gdbinit <<EOF
|
||||
cat >> ~/.gdbinit <<EOF
|
||||
set auto-load safe-path /
|
||||
set solib-search-path ./lib/
|
||||
set pagination off
|
||||
@@ -96,10 +150,11 @@ boost.register_printers()
|
||||
end
|
||||
EOF
|
||||
|
||||
echo "dir $HOME/debug" >> ~/.gdbinit
|
||||
echo "dir $HOME/debug" >> ~/.gdbinit
|
||||
|
||||
# Empty out the progress script that warns users about the set script still running when users log in.
|
||||
echo "" > ~/.setup_spawnhost_coredump_progress
|
||||
# Alert currently logged in users that this setup script has completed. Logging back in will ensure any
|
||||
# paths/environment variables will be set as intended.
|
||||
wall "The setup_spawnhost_coredump script has completed, please relogin to ensure the right environment variables are set."
|
||||
# Empty out the progress script that warns users about the set script still running when users log in.
|
||||
echo "" > ~/.setup_spawnhost_coredump_progress
|
||||
# Alert currently logged in users that this setup script has completed. Logging back in will ensure any
|
||||
# paths/environment variables will be set as intended.
|
||||
wall "The setup_spawnhost_coredump script has completed, please relogin to ensure the right environment variables are set."
|
||||
fi
|
||||
|
||||
@@ -87,7 +87,7 @@ def build_mock_orchestrator(build_expansions=None, task_def_list=None, build_tas
|
||||
class TestEvgExpansions(unittest.TestCase):
|
||||
def test_get_max_sub_suites_should_use_patch_value_in_patches(self):
|
||||
evg_expansions = under_test.EvgExpansions(
|
||||
is_patch=True,
|
||||
is_patch="true",
|
||||
max_sub_suites=5,
|
||||
mainline_max_sub_suites=1,
|
||||
build_id="build_id",
|
||||
@@ -102,7 +102,7 @@ class TestEvgExpansions(unittest.TestCase):
|
||||
|
||||
def test_get_max_sub_suites_should_use_mainline_value_in_non_patches(self):
|
||||
evg_expansions = under_test.EvgExpansions(
|
||||
is_patch=False,
|
||||
is_patch="false",
|
||||
max_sub_suites=5,
|
||||
mainline_max_sub_suites=1,
|
||||
build_id="build_id",
|
||||
@@ -133,6 +133,60 @@ class TestEvgExpansions(unittest.TestCase):
|
||||
evg_expansions.mainline_max_sub_suites)
|
||||
|
||||
|
||||
class TestDetermineIsPatch(unittest.TestCase):
|
||||
def test_is_patch_is_none_should_return_false(self):
|
||||
evg_expansions = under_test.EvgExpansions(
|
||||
is_patch=None,
|
||||
build_id="build_id",
|
||||
build_variant="build variant",
|
||||
project="project",
|
||||
revision="abc123",
|
||||
task_name="task name",
|
||||
task_id="task_314",
|
||||
)
|
||||
|
||||
self.assertFalse(evg_expansions.determine_is_patch())
|
||||
|
||||
def test_is_patch_is_false_should_return_false(self):
|
||||
evg_expansions = under_test.EvgExpansions(
|
||||
is_patch="false",
|
||||
build_id="build_id",
|
||||
build_variant="build variant",
|
||||
project="project",
|
||||
revision="abc123",
|
||||
task_name="task name",
|
||||
task_id="task_314",
|
||||
)
|
||||
|
||||
self.assertFalse(evg_expansions.determine_is_patch())
|
||||
|
||||
def test_is_patch_is_empty_string_should_return_false(self):
|
||||
evg_expansions = under_test.EvgExpansions(
|
||||
is_patch="",
|
||||
build_id="build_id",
|
||||
build_variant="build variant",
|
||||
project="project",
|
||||
revision="abc123",
|
||||
task_name="task name",
|
||||
task_id="task_314",
|
||||
)
|
||||
|
||||
self.assertFalse(evg_expansions.determine_is_patch())
|
||||
|
||||
def test_is_patch_is_true_should_return_true(self):
|
||||
evg_expansions = under_test.EvgExpansions(
|
||||
is_patch="true",
|
||||
build_id="build_id",
|
||||
build_variant="build variant",
|
||||
project="project",
|
||||
revision="abc123",
|
||||
task_name="task name",
|
||||
task_id="task_314",
|
||||
)
|
||||
|
||||
self.assertTrue(evg_expansions.determine_is_patch())
|
||||
|
||||
|
||||
class TestTranslateRunVar(unittest.TestCase):
|
||||
def test_normal_value_should_be_returned(self):
|
||||
run_var = "some value"
|
||||
|
||||
@@ -3184,18 +3184,6 @@ tasks:
|
||||
- func: "send benchmark results"
|
||||
- func: "analyze benchmark results"
|
||||
|
||||
- <<: *benchmark_template
|
||||
name: benchmarks_bsoncolumn
|
||||
tags: ["benchmarks"]
|
||||
commands:
|
||||
- func: "do benchmark setup"
|
||||
- func: "run tests"
|
||||
vars:
|
||||
suite: benchmarks_bsoncolumn
|
||||
resmoke_jobs_max: 1
|
||||
- func: "send benchmark results"
|
||||
- func: "analyze benchmark results"
|
||||
|
||||
- <<: *run_jepsen_template
|
||||
name: jepsen_register_findAndModify
|
||||
tags: ["jepsen"]
|
||||
|
||||
@@ -15,7 +15,7 @@ variables:
|
||||
- variant: linux-wt-standalone
|
||||
name: compile
|
||||
_real_expansions: &_expansion_updates
|
||||
[]
|
||||
[]
|
||||
###
|
||||
|
||||
###
|
||||
@@ -166,10 +166,6 @@ functions:
|
||||
params:
|
||||
script: ./src/dsi/run-dsi determine_failure -m TEST
|
||||
f_dsi_post_run:
|
||||
- command: json.send
|
||||
params:
|
||||
name: perf
|
||||
file: ./build/LegacyPerfJson/perf.json
|
||||
- command: shell.exec
|
||||
params:
|
||||
script: ./src/dsi/run-dsi post_run
|
||||
|
||||
@@ -22,9 +22,7 @@ variables:
|
||||
- name: schedule_global_auto_tasks
|
||||
variant: task_generation
|
||||
_real_expansions: &_expansion_updates
|
||||
# TODO: Disable in SERVER-57226.
|
||||
- key: enable_detect_changes
|
||||
value: "true"
|
||||
[]
|
||||
###
|
||||
|
||||
###
|
||||
@@ -190,12 +188,6 @@ functions:
|
||||
params:
|
||||
script: ./src/dsi/run-dsi determine_failure -m TEST
|
||||
f_dsi_post_run:
|
||||
# TODO: SERVER-57226 will let us move this json.send to after dsi's post_run.
|
||||
# This is preferred to keep DSI execution contiguous.
|
||||
- command: json.send
|
||||
params:
|
||||
name: perf
|
||||
file: ./build/LegacyPerfJson/perf.json
|
||||
- command: shell.exec
|
||||
params:
|
||||
script: ./src/dsi/run-dsi post_run
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*/
|
||||
'use strict';
|
||||
|
||||
const dbPrefix = 'fsmDB_';
|
||||
const dbPrefix = jsTestName() + '_DB_';
|
||||
const dbCount = 2;
|
||||
const collPrefix = 'sharded_coll_';
|
||||
const collCount = 2;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*/
|
||||
'use strict';
|
||||
|
||||
const dbPrefix = 'fsmDB_';
|
||||
const dbPrefix = jsTestName() + '_DB_';
|
||||
const dbCount = 2;
|
||||
const collPrefix = 'sharded_coll_';
|
||||
const collCount = 2;
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
* ]
|
||||
*/
|
||||
|
||||
const dbPrefix = 'fsmDB_';
|
||||
const dbPrefix = jsTestName() + '_DB_';
|
||||
const dbCount = 2;
|
||||
const collPrefix = 'sharded_coll_';
|
||||
const collCount = 2;
|
||||
|
||||
@@ -48,9 +48,19 @@ reshardingTest.withReshardingInBackground(
|
||||
// Wait until participants are aware of the resharding operation.
|
||||
reshardingTest.awaitCloneTimestampChosen();
|
||||
|
||||
const ns = sourceCollection.getFullName();
|
||||
awaitAbort = startParallelShell(funWithArgs(function(ns) {
|
||||
db.adminCommand({abortReshardCollection: ns});
|
||||
}, sourceCollection.getFullName()), mongos.port);
|
||||
}, ns), mongos.port);
|
||||
|
||||
// Wait for the coordinator to have persisted its decision to abort the resharding operation
|
||||
// as a result of the abortReshardCollection command being processed.
|
||||
assert.soon(() => {
|
||||
const coordinatorDoc =
|
||||
mongos.getCollection("config.reshardingOperations").findOne({ns: ns});
|
||||
|
||||
return coordinatorDoc !== null && coordinatorDoc.state === "aborting";
|
||||
});
|
||||
},
|
||||
{
|
||||
expectedErrorCode: ErrorCodes.ReshardCollectionAborted,
|
||||
|
||||
@@ -43,8 +43,9 @@ namespace {
|
||||
// Start capacity for memory blocks allocated by ElementStorage
|
||||
constexpr int kStartCapacity = 128;
|
||||
|
||||
// Max capacity for memory blocks allocated by ElementStorage
|
||||
constexpr int kMaxCapacity = 1024 * 32;
|
||||
// Max capacity for memory blocks allocated by ElementStorage. We need to allow blocks to grow to at
|
||||
// least BSONObjMaxUserSize so we can construct user objects efficiently.
|
||||
constexpr int kMaxCapacity = BSONObjMaxUserSize;
|
||||
|
||||
// Memory offset to get to BSONElement value when field name is an empty string.
|
||||
constexpr int kElementValueOffset = 2;
|
||||
|
||||
@@ -55,10 +55,14 @@ public:
|
||||
}
|
||||
|
||||
~DeadlineFuture() {
|
||||
auto lk = stdx::lock_guard(_mutex);
|
||||
_executor->cancel(_timeoutCbHandle.get());
|
||||
// The _executor holds the shared ptr on this, the callback will set the promise.
|
||||
invariant(get().isReady());
|
||||
{
|
||||
auto lk = stdx::lock_guard(_mutex);
|
||||
if (_timeoutCbHandle) {
|
||||
_executor->cancel(_timeoutCbHandle.get());
|
||||
}
|
||||
// The _executor holds the shared ptr on this, the callback will set the promise.
|
||||
invariant(get().isReady());
|
||||
}
|
||||
}
|
||||
|
||||
SharedSemiFuture<ResultStatus> get() const {
|
||||
@@ -104,6 +108,7 @@ private:
|
||||
std::move(inputFuture).onCompletion([this, self](StatusWith<ResultStatus> status) {
|
||||
auto lk = stdx::lock_guard(_mutex);
|
||||
_executor->cancel(_timeoutCbHandle.get());
|
||||
_timeoutCbHandle = boost::none;
|
||||
if (!get().isReady()) {
|
||||
_outputFuturePromise->setFrom(status);
|
||||
}
|
||||
|
||||
@@ -195,15 +195,6 @@ std::vector<AsyncRequestsSender::Response> sendAuthenticatedCommandToShards(
|
||||
const BSONObj& command,
|
||||
const std::vector<ShardId>& shardIds,
|
||||
const std::shared_ptr<executor::TaskExecutor>& executor) {
|
||||
// TODO SERVER-57519: remove the following scope
|
||||
{
|
||||
// Ensure ShardRegistry is initialized before using the AsyncRequestsSender that relies on
|
||||
// unsafe functions (SERVER-57280)
|
||||
auto shardRegistry = Grid::get(opCtx)->shardRegistry();
|
||||
if (!shardRegistry->isUp()) {
|
||||
shardRegistry->reload(opCtx);
|
||||
}
|
||||
}
|
||||
|
||||
// The AsyncRequestsSender ignore impersonation metadata so we need to manually attach them to
|
||||
// the command
|
||||
|
||||
@@ -151,9 +151,10 @@ AsyncRequestsSender::RemoteData::RemoteData(AsyncRequestsSender* ars,
|
||||
BSONObj cmdObj)
|
||||
: _ars(ars), _shardId(std::move(shardId)), _cmdObj(std::move(cmdObj)) {}
|
||||
|
||||
std::shared_ptr<Shard> AsyncRequestsSender::RemoteData::getShard() {
|
||||
// TODO: Pass down an OperationContext* to use here.
|
||||
return Grid::get(getGlobalServiceContext())->shardRegistry()->getShardNoReload(_shardId);
|
||||
SemiFuture<std::shared_ptr<Shard>> AsyncRequestsSender::RemoteData::getShard() noexcept {
|
||||
return Grid::get(getGlobalServiceContext())
|
||||
->shardRegistry()
|
||||
->getShard(*_ars->_subBaton, _shardId);
|
||||
}
|
||||
|
||||
void AsyncRequestsSender::RemoteData::executeRequest() {
|
||||
@@ -173,7 +174,12 @@ void AsyncRequestsSender::RemoteData::executeRequest() {
|
||||
|
||||
auto AsyncRequestsSender::RemoteData::scheduleRequest()
|
||||
-> SemiFuture<RemoteCommandOnAnyCallbackArgs> {
|
||||
return resolveShardIdToHostAndPorts(_ars->_readPreference)
|
||||
return getShard()
|
||||
.thenRunOn(*_ars->_subBaton)
|
||||
.then([this](auto&& shard) {
|
||||
return shard->getTargeter()->findHosts(_ars->_readPreference,
|
||||
CancellationToken::uncancelable());
|
||||
})
|
||||
.thenRunOn(*_ars->_subBaton)
|
||||
.then([this](auto&& hostAndPorts) {
|
||||
_shardHostAndPort.emplace(hostAndPorts.front());
|
||||
@@ -183,17 +189,6 @@ auto AsyncRequestsSender::RemoteData::scheduleRequest()
|
||||
.semi();
|
||||
}
|
||||
|
||||
SemiFuture<std::vector<HostAndPort>> AsyncRequestsSender::RemoteData::resolveShardIdToHostAndPorts(
|
||||
const ReadPreferenceSetting& readPref) {
|
||||
const auto shard = getShard();
|
||||
if (!shard) {
|
||||
return Status(ErrorCodes::ShardNotFound,
|
||||
str::stream() << "Could not find shard " << _shardId);
|
||||
}
|
||||
|
||||
return shard->getTargeter()->findHosts(readPref, CancellationToken::uncancelable());
|
||||
}
|
||||
|
||||
auto AsyncRequestsSender::RemoteData::scheduleRemoteCommand(std::vector<HostAndPort>&& hostAndPorts)
|
||||
-> SemiFuture<RemoteCommandOnAnyCallbackArgs> {
|
||||
hangBeforeSchedulingRemoteCommand.executeIf(
|
||||
@@ -259,43 +254,47 @@ auto AsyncRequestsSender::RemoteData::handleResponse(RemoteCommandOnAnyCallbackA
|
||||
}
|
||||
|
||||
// There was an error with either the response or the command.
|
||||
auto shard = getShard();
|
||||
if (!shard) {
|
||||
uasserted(ErrorCodes::ShardNotFound, str::stream() << "Could not find shard " << _shardId);
|
||||
} else {
|
||||
std::vector<HostAndPort> failedTargets;
|
||||
return getShard()
|
||||
.thenRunOn(*_ars->_subBaton)
|
||||
.then([this, status = std::move(status), rcr = std::move(rcr)](
|
||||
std::shared_ptr<mongo::Shard>&& shard) {
|
||||
std::vector<HostAndPort> failedTargets;
|
||||
|
||||
if (rcr.response.target) {
|
||||
failedTargets = {*rcr.response.target};
|
||||
} else {
|
||||
failedTargets = rcr.request.target;
|
||||
}
|
||||
if (rcr.response.target) {
|
||||
failedTargets = {*rcr.response.target};
|
||||
} else {
|
||||
failedTargets = rcr.request.target;
|
||||
}
|
||||
|
||||
shard->updateReplSetMonitor(failedTargets.front(), status);
|
||||
bool isStartingTransaction = _cmdObj.getField("startTransaction").booleanSafe();
|
||||
if (!_ars->_stopRetrying && shard->isRetriableError(status.code(), _ars->_retryPolicy) &&
|
||||
_retryCount < kMaxNumFailedHostRetryAttempts && !isStartingTransaction) {
|
||||
shard->updateReplSetMonitor(failedTargets.front(), status);
|
||||
bool isStartingTransaction = _cmdObj.getField("startTransaction").booleanSafe();
|
||||
if (!_ars->_stopRetrying &&
|
||||
shard->isRetriableError(status.code(), _ars->_retryPolicy) &&
|
||||
_retryCount < kMaxNumFailedHostRetryAttempts && !isStartingTransaction) {
|
||||
|
||||
LOGV2_DEBUG(4615637,
|
||||
1,
|
||||
"Command to remote {shardId} for hosts {hosts} failed with retryable error "
|
||||
"{error} and will be retried",
|
||||
"Command to remote shard failed with retryable error and will be retried",
|
||||
"shardId"_attr = _shardId,
|
||||
"hosts"_attr = failedTargets,
|
||||
"error"_attr = redact(status));
|
||||
++_retryCount;
|
||||
_shardHostAndPort.reset();
|
||||
// retry through recursion
|
||||
return scheduleRequest();
|
||||
}
|
||||
}
|
||||
LOGV2_DEBUG(
|
||||
4615637,
|
||||
1,
|
||||
"Command to remote {shardId} for hosts {hosts} failed with retryable error "
|
||||
"{error} and will be retried",
|
||||
"Command to remote shard failed with retryable error and will be retried",
|
||||
"shardId"_attr = _shardId,
|
||||
"hosts"_attr = failedTargets,
|
||||
"error"_attr = redact(status));
|
||||
++_retryCount;
|
||||
_shardHostAndPort.reset();
|
||||
// retry through recursion
|
||||
return scheduleRequest();
|
||||
}
|
||||
|
||||
// Status' in the response.status field that aren't retried get converted to top level errors
|
||||
uassertStatusOK(rcr.response.status);
|
||||
// Status' in the response.status field that aren't retried get converted to top level
|
||||
// errors
|
||||
uassertStatusOK(rcr.response.status);
|
||||
|
||||
// We're not okay (on the remote), but still not going to retry
|
||||
return std::move(rcr);
|
||||
// We're not okay (on the remote), but still not going to retry
|
||||
return Future<RemoteCommandOnAnyCallbackArgs>::makeReady(std::move(rcr)).semi();
|
||||
})
|
||||
.semi();
|
||||
};
|
||||
|
||||
} // namespace mongo
|
||||
|
||||
@@ -177,9 +177,15 @@ private:
|
||||
RemoteData(AsyncRequestsSender* ars, ShardId shardId, BSONObj cmdObj);
|
||||
|
||||
/**
|
||||
* Returns the Shard object associated with this remote.
|
||||
* Returns a SemiFuture containing a shard object associated with this remote.
|
||||
*
|
||||
* This will return a SemiFuture with a ShardNotFound error status in case the shard is not
|
||||
* found.
|
||||
*
|
||||
* Additionally this call can trigger a refresh of the ShardRegistry so it could possibly
|
||||
* return other network error status related to the refresh.
|
||||
*/
|
||||
std::shared_ptr<Shard> getShard();
|
||||
SemiFuture<std::shared_ptr<Shard>> getShard() noexcept;
|
||||
|
||||
/**
|
||||
* Returns true if we've already queued a response from the remote.
|
||||
|
||||
@@ -218,9 +218,9 @@ void ShardRegistry::startupPeriodicReloader(OperationContext* opCtx) {
|
||||
|
||||
AsyncTry([this] {
|
||||
LOGV2_DEBUG(22726, 1, "Reloading shardRegistry");
|
||||
return _reloadInternal();
|
||||
return _reloadAsyncNoRetry();
|
||||
})
|
||||
.until([](auto sw) {
|
||||
.until([](auto&& sw) {
|
||||
if (!sw.isOK()) {
|
||||
LOGV2(22727,
|
||||
"Error running periodic reload of shard registry",
|
||||
@@ -232,7 +232,7 @@ void ShardRegistry::startupPeriodicReloader(OperationContext* opCtx) {
|
||||
})
|
||||
.withDelayBetweenIterations(kRefreshPeriod) // This call is optional.
|
||||
.on(_executor, CancellationToken::uncancelable())
|
||||
.getAsync([](auto sw) {
|
||||
.getAsync([](auto&& sw) {
|
||||
LOGV2_DEBUG(22725,
|
||||
1,
|
||||
"Exiting periodic shard registry reloader",
|
||||
@@ -295,6 +295,49 @@ StatusWith<std::shared_ptr<Shard>> ShardRegistry::getShard(OperationContext* opC
|
||||
return {ErrorCodes::ShardNotFound, str::stream() << "Shard " << shardId << " not found"};
|
||||
}
|
||||
|
||||
SemiFuture<std::shared_ptr<Shard>> ShardRegistry::getShard(ExecutorPtr executor,
|
||||
const ShardId& shardId) noexcept {
|
||||
|
||||
// Fetch the shard registry data associated to the latest known topology time
|
||||
return _getDataAsync()
|
||||
.thenRunOn(executor)
|
||||
.then([this, executor, shardId](auto&& cachedData) {
|
||||
// First check if this is a non config shard lookup
|
||||
if (auto shard = cachedData->findShard(shardId)) {
|
||||
return SemiFuture<std::shared_ptr<Shard>>::makeReady(std::move(shard));
|
||||
}
|
||||
|
||||
// then check if this is a config shard (this call is blocking in any case)
|
||||
{
|
||||
stdx::lock_guard<Latch> lk(_mutex);
|
||||
if (auto shard = _configShardData.findShard(shardId)) {
|
||||
return SemiFuture<std::shared_ptr<Shard>>::makeReady(std::move(shard));
|
||||
}
|
||||
}
|
||||
|
||||
// If the shard was not found, force reload the shard regitry data and try again.
|
||||
//
|
||||
// This is to cover the following scenario:
|
||||
// 1. Primary of the replicaset fetch the list of shards and store it on disk
|
||||
// 2. Primary crash before the latest VectorClock topology time is majority written to
|
||||
// disk
|
||||
// 3. A new primary with a stale ShardRegistry is elected and read the set of shards
|
||||
// from disk and calls ShardRegistry::getShard
|
||||
|
||||
return _reloadAsync()
|
||||
.thenRunOn(executor)
|
||||
.then([this, executor, shardId](auto&& cachedData) -> std::shared_ptr<Shard> {
|
||||
auto shard = cachedData->findShard(shardId);
|
||||
uassert(ErrorCodes::ShardNotFound,
|
||||
str::stream() << "Shard " << shardId << " not found",
|
||||
shard);
|
||||
return shard;
|
||||
})
|
||||
.semi();
|
||||
})
|
||||
.semi();
|
||||
}
|
||||
|
||||
std::vector<ShardId> ShardRegistry::getAllShardIds(OperationContext* opCtx) {
|
||||
auto shardIds = _getData(opCtx)->getAllShardIds();
|
||||
if (shardIds.empty()) {
|
||||
@@ -400,24 +443,27 @@ void ShardRegistry::toBSON(BSONObjBuilder* result) const {
|
||||
}
|
||||
|
||||
void ShardRegistry::reload(OperationContext* opCtx) {
|
||||
_reloadAsync().get(opCtx);
|
||||
}
|
||||
|
||||
SharedSemiFuture<ShardRegistry::Cache::ValueHandle> ShardRegistry::_reloadAsync() {
|
||||
if (MONGO_unlikely(TestingProctor::instance().isEnabled())) {
|
||||
// TODO SERVER-62152 investigate hang on reload in unit tests
|
||||
// Some unit tests don't support running the reload's AsyncTry on the fixed executor.
|
||||
_reloadInternal().get(opCtx);
|
||||
return _reloadAsyncNoRetry();
|
||||
} else {
|
||||
AsyncTry([=]() mutable { return _reloadInternal(); })
|
||||
return AsyncTry([=]() mutable { return _reloadAsyncNoRetry(); })
|
||||
.until([](auto sw) mutable {
|
||||
return sw.getStatus() != ErrorCodes::ReadConcernMajorityNotAvailableYet;
|
||||
})
|
||||
.withBackoffBetweenIterations(kExponentialBackoff)
|
||||
.on(Grid::get(opCtx)->getExecutorPool()->getFixedExecutor(),
|
||||
.on(Grid::get(getGlobalServiceContext())->getExecutorPool()->getFixedExecutor(),
|
||||
CancellationToken::uncancelable())
|
||||
.semi()
|
||||
.get(opCtx);
|
||||
.share();
|
||||
}
|
||||
}
|
||||
|
||||
SharedSemiFuture<ShardRegistry::Cache::ValueHandle> ShardRegistry::_reloadInternal() {
|
||||
SharedSemiFuture<ShardRegistry::Cache::ValueHandle> ShardRegistry::_reloadAsyncNoRetry() {
|
||||
// Make the next acquire do a lookup.
|
||||
auto value = _forceReloadIncrement.addAndFetch(1);
|
||||
LOGV2_DEBUG(4620253, 2, "Forcing ShardRegistry reload", "newForceReloadIncrement"_attr = value);
|
||||
|
||||
@@ -239,6 +239,9 @@ public:
|
||||
*/
|
||||
StatusWith<std::shared_ptr<Shard>> getShard(OperationContext* opCtx, const ShardId& shardId);
|
||||
|
||||
SemiFuture<std::shared_ptr<Shard>> getShard(ExecutorPtr executor,
|
||||
const ShardId& shardId) noexcept;
|
||||
|
||||
/**
|
||||
* Returns a vector containing all known shard IDs.
|
||||
* The order of the elements is not guaranteed.
|
||||
@@ -438,7 +441,8 @@ private:
|
||||
|
||||
void _initializeCacheIfNecessary() const;
|
||||
|
||||
SharedSemiFuture<Cache::ValueHandle> _reloadInternal();
|
||||
SharedSemiFuture<Cache::ValueHandle> _reloadAsync();
|
||||
SharedSemiFuture<Cache::ValueHandle> _reloadAsyncNoRetry();
|
||||
|
||||
/**
|
||||
* Factory to create shards. Never changed after startup so safe to access outside of _mutex.
|
||||
|
||||
Reference in New Issue
Block a user