2022-04-15 21:58:44 +00:00
""" Test hook that runs shard splits continuously. """
import copy
import random
import threading
import time
import uuid
import bson
import pymongo . errors
from buildscripts . resmokelib import errors
from buildscripts . resmokelib . testing . fixtures import interface as fixture_interface
from buildscripts . resmokelib . testing . fixtures import shard_split
from buildscripts . resmokelib . testing . fixtures . replicaset import ReplicaSetFixture
from buildscripts . resmokelib . testing . hooks import interface
class ContinuousShardSplit ( interface . Hook ) : # pylint: disable=too-many-instance-attributes
""" Starts a shard split thread at the beginning of each test. """
DESCRIPTION = ( " Continuous shard split operations " )
IS_BACKGROUND = True
AWAIT_REPL_TIMEOUT_MINS = ReplicaSetFixture . AWAIT_REPL_TIMEOUT_MINS
def __init__ ( self , hook_logger , fixture , shell_options ) :
""" Initialize the ContinuousShardSplit.
Args :
hook_logger : the logger instance for this hook .
fixture : the target ShardSplitFixture containing the donor replica set .
shell_options : contains the global_vars which contains TestData . tenantIds to be used for
shard splits .
"""
interface . Hook . __init__ ( self , hook_logger , fixture , ContinuousShardSplit . DESCRIPTION )
if not isinstance ( fixture , shard_split . ShardSplitFixture ) :
raise ValueError ( " The ContinuousShardSplit hook requires a ShardSplitFixture " )
self . _shard_split_fixture = fixture
self . _shell_options = copy . deepcopy ( shell_options )
self . _shard_split_thread = None
def before_suite ( self , test_report ) :
""" Before suite. """
if not self . _shard_split_fixture :
raise ValueError ( " No ShardSplitFixture to run shard splits on " )
self . logger . info ( " Starting the shard split thread. " )
self . _shard_split_thread = _ShardSplitThread ( self . logger , self . _shard_split_fixture ,
self . _shell_options , test_report )
self . _shard_split_thread . start ( )
def after_suite ( self , test_report , teardown_flag = None ) :
""" After suite. """
self . logger . info ( " Stopping the shard split thread. " )
self . _shard_split_thread . stop ( )
self . logger . info ( " Stopped the shard split thread. " )
def before_test ( self , test , test_report ) :
""" Before test. """
self . logger . info ( " Resuming the shard split thread. " )
self . _shard_split_thread . resume ( test )
def after_test ( self , test , test_report ) :
""" After test. """
self . logger . info ( " Pausing the shard split thread. " )
self . _shard_split_thread . pause ( )
self . logger . info ( " Paused the shard split thread. " )
class ShardSplitLifeCycle ( object ) :
""" Class for managing the various states of the shard split thread.
The job thread alternates between calling mark_test_started ( ) and mark_test_finished ( ) . The
shard split thread is allowed to perform splits at any point between these two calls .
Note that the job thread synchronizes with the shard split thread outside the context of
this object to know it isn ' t in the process of running a split.
"""
_TEST_STARTED_STATE = " start "
_TEST_FINISHED_STATE = " finished "
def __init__ ( self ) :
""" Initialize the ShardSplitLifeCycle instance. """
self . __lock = threading . Lock ( )
self . __cond = threading . Condition ( self . __lock )
self . test_num = 0
self . __test_state = self . _TEST_FINISHED_STATE
self . __should_stop = False
def mark_test_started ( self ) :
""" Signal to the shard split thread that a new test has started.
This function should be called during before_test ( ) . Calling it causes the
wait_for_shard_split_permitted ( ) function to no longer block and to instead return
true .
"""
with self . __lock :
self . test_num + = 1
self . __test_state = self . _TEST_STARTED_STATE
self . __cond . notify_all ( )
def mark_test_finished ( self ) :
""" Signal to the shard split thread that the current test has finished.
This function should be called during after_test ( ) . Calling it causes the
wait_for_shard_split_permitted ( ) function to block until mark_test_started ( ) is called
again .
"""
with self . __lock :
self . __test_state = self . _TEST_FINISHED_STATE
self . __cond . notify_all ( )
2022-04-26 22:23:07 +00:00
def is_test_finished ( self ) :
""" Return true if the current test has finished. """
with self . __lock :
return self . __test_state == self . _TEST_FINISHED_STATE
2022-04-15 21:58:44 +00:00
def stop ( self ) :
""" Signal to the shard split thread that it should exit.
This function should be called during after_suite ( ) . Calling it causes the
wait_for_shard_split_permitted ( ) function to no longer block and to instead return
false .
"""
with self . __lock :
self . __should_stop = True
self . __cond . notify_all ( )
def wait_for_shard_split_permitted ( self ) :
""" Block until splits are permitted, or until stop() is called. """
with self . __lock :
while not self . __should_stop :
if self . __test_state == self . _TEST_STARTED_STATE :
return True
self . __cond . wait ( )
return False
def wait_for_shard_split_interval ( self , timeout ) :
""" Block for ' timeout ' seconds, or until stop() is called. """
with self . __lock :
self . __cond . wait ( timeout )
def poll_for_idle_request ( self ) : # noqa: D205,D400
""" Return true if the shard split thread should continue running splits, or false
if it should temporarily stop running splits .
"""
with self . __lock :
return self . __test_state == self . _TEST_FINISHED_STATE
class _ShardSplitOptions :
def __init__ ( # pylint: disable=too-many-arguments
self , logger , shard_split_fixture , tenant_ids , recipient_tag_name , recipient_set_name ) :
self . logger = logger
self . migration_id = uuid . uuid4 ( )
self . shard_split_fixture = shard_split_fixture
self . tenant_ids = tenant_ids
self . recipient_tag_name = recipient_tag_name
self . recipient_set_name = recipient_set_name
def get_migration_id_as_binary ( self ) :
""" Return the migration id as BSON Binary. """
return bson . Binary ( self . migration_id . bytes , 4 )
def get_donor_rs ( self ) :
""" Return the current donor for the split fixture. """
return self . shard_split_fixture . get_donor_rs ( )
def get_donor_name ( self ) :
""" Return the replica set name for the donor. """
return self . get_donor_rs ( ) . replset_name
def get_donor_primary ( self ) :
""" Return a connection to the donor primary. """
return self . get_donor_rs ( ) . get_primary ( timeout_secs = self . AWAIT_REPL_TIMEOUT_MINS )
def get_donor_nodes ( self ) :
""" Return the nodes for the current shard split fixture donor. """
return self . get_donor_rs ( ) . nodes
def get_recipient_nodes ( self ) :
""" Return the recipient nodes for the shard split fixture. """
return self . shard_split_fixture . get_recipient_nodes ( )
def __str__ ( self ) :
opts = {
" migration_id " : self . migration_id , " tenant_ids " : self . tenant_ids ,
" donor " : self . get_donor_name ( ) , " recipientSetName " : self . recipient_set_name ,
" recipientTagName " : self . recipient_tag_name
}
return str ( opts )
class _ShardSplitThread ( threading . Thread ) : # pylint: disable=too-many-instance-attributes
THREAD_NAME = " ShardSplitThread "
WAIT_SECS_RANGES = [ [ 0.05 , 0.1 ] , [ 0.1 , 0.5 ] , [ 1 , 5 ] , [ 5 , 15 ] ]
POLL_INTERVAL_SECS = 0.1
NO_SUCH_MIGRATION_ERR_CODE = 327
INTERNAL_ERR_CODE = 1
def __init__ ( self , logger , shard_split_fixture , shell_options , test_report ) :
""" Initialize _ShardSplitThread. """
threading . Thread . __init__ ( self , name = self . THREAD_NAME )
self . daemon = True
self . logger = logger
self . _shard_split_fixture = shard_split_fixture
self . _tenant_ids = shell_options [ " global_vars " ] [ " TestData " ] [ " tenantIds " ]
self . _auth_options = shell_options [ " global_vars " ] [ " TestData " ] [ " authOptions " ]
self . _test = None
self . _test_report = test_report
self . _shell_options = shell_options
self . __lifecycle = ShardSplitLifeCycle ( )
# Event set when the thread has been stopped using the 'stop()' method.
self . _is_stopped_evt = threading . Event ( )
# Event set when the thread is not performing shard splits.
self . _is_idle_evt = threading . Event ( )
self . _is_idle_evt . set ( )
def run ( self ) :
""" Execute the thread. """
if not self . _shard_split_fixture :
self . logger . warning ( " No ShardSplitFixture to run shard splits on. " )
return
split_count = 0
try :
while True :
self . _is_idle_evt . set ( )
permitted = self . __lifecycle . wait_for_shard_split_permitted ( )
if not permitted :
break
2022-05-05 10:40:16 +00:00
if split_count > = 3 : # TODO(SERVER-66045): Remove this check and run unbounded splits
2022-04-15 21:58:44 +00:00
time . sleep ( self . POLL_INTERVAL_SECS )
continue
self . _is_idle_evt . clear ( )
split_opts = self . _create_split_opts ( split_count )
# Set up the donor for a split
self . _shard_split_fixture . add_recipient_nodes ( split_opts . recipient_set_name )
# Briefly wait to let the test run before starting the split operation, so that
# the first split is more likely to have data to migrate.
wait_secs = random . uniform (
* self . WAIT_SECS_RANGES [ split_count % len ( self . WAIT_SECS_RANGES ) ] )
self . logger . info ( f " Waiting for { wait_secs } seconds before starting split. " )
self . __lifecycle . wait_for_shard_split_interval ( wait_secs )
self . logger . info ( f " Starting shard split: { str ( split_opts ) } . " )
start_time = time . time ( )
is_committed = self . _run_shard_split ( split_opts )
end_time = time . time ( )
split_count + = 1
self . logger . info (
f " Completed shard split { str ( split_opts ) } in { ( end_time - start_time ) * 1000 } ms. "
)
# set up the fixture for the next split operation
if is_committed :
self . _shard_split_fixture . replace_donor_with_recipient (
split_opts . recipient_set_name )
else :
self . _shard_split_fixture . remove_recipient_nodes ( )
found_idle_request = self . __lifecycle . poll_for_idle_request ( )
if found_idle_request :
continue
except Exception : # pylint: disable=W0703
# Proactively log the exception when it happens so it will be flushed immediately.
self . logger . exception ( " Shard split thread threw exception " )
# The event should be signaled whenever the thread is not performing shard splits.
self . _is_idle_evt . set ( )
def stop ( self ) :
""" Stop the thread when the suite finishes. """
self . __lifecycle . stop ( )
self . _is_stopped_evt . set ( )
# Unpause to allow the thread to finish.
self . resume ( self . _test )
self . join ( )
def pause ( self ) :
""" Pause the thread after test. """
self . __lifecycle . mark_test_finished ( )
# Wait until we are no longer executing splits.
self . _is_idle_evt . wait ( )
# Check if the thread is alive in case it has thrown an exception while running.
self . _check_thread ( )
# Check that the fixture is still running.
if not self . _shard_split_fixture . is_running ( ) :
raise errors . ServerFailure (
f " ShardSplitFixture with pids { self . _shard_split_fixture . pids ( ) } expected to be running in "
" ContinuousShardSplit, but wasn ' t. " )
def resume ( self , test ) :
""" Resume the thread before test. """
self . _test = test
self . __lifecycle . mark_test_started ( )
def _wait ( self , timeout ) :
""" Wait until stop or timeout. """
self . _is_stopped_evt . wait ( timeout )
def short_name ( self ) :
""" Return the name of the thread. """
return self . THREAD_NAME
def _check_thread ( self ) :
""" Throw an error if the thread is not running. """
if not self . is_alive ( ) :
msg = " Shard split thread is not running. "
self . logger . error ( msg )
raise errors . ServerFailure ( msg )
def _is_fail_point_abort_reason ( self , abort_reason ) :
return abort_reason [ " code " ] == self . INTERNAL_ERR_CODE and abort_reason [
" errmsg " ] == " simulate a shard split error "
def _create_split_opts ( self , split_count ) :
recipient_set_name = f " rs { split_count + 1 } "
recipient_tag_name = " recipientNode "
return _ShardSplitOptions ( self . logger , self . _shard_split_fixture , self . _tenant_ids ,
recipient_tag_name , recipient_set_name )
def _create_client ( self , node ) :
return fixture_interface . authenticate ( node . mongo_client ( ) , self . _auth_options )
def _run_shard_split ( self , split_opts ) : # noqa: D205,D400
try :
donor_client = self . _create_client ( split_opts . get_donor_rs ( ) )
res = self . _commit_shard_split ( donor_client , split_opts )
is_committed = res [ " state " ] == " committed "
2022-04-26 22:23:07 +00:00
# Garbage collect the split prior to throwing error to avoid conflicting operations
# in the next test.
if is_committed :
# Wait for the donor/proxy to reroute at least one command before doing garbage
# collection. Stop waiting when the test finishes.
self . _wait_for_reroute_or_test_completion ( donor_client , split_opts )
2022-04-15 21:58:44 +00:00
self . _forget_shard_split ( donor_client , split_opts )
self . _wait_for_garbage_collection ( split_opts )
if not res [ " ok " ] :
raise errors . ServerFailure (
f " Shard split ' { split_opts . migration_id } ' on replica set "
f " ' { split_opts . get_donor_name ( ) } ' failed: { str ( res ) } " )
if is_committed :
return True
abort_reason = res [ " abortReason " ]
if self . _is_fail_point_abort_reason ( abort_reason ) :
self . logger . info (
f " Shard split ' { split_opts . migration_id } ' on replica set "
f " ' { split_opts . get_donor_name ( ) } ' aborted due to failpoint: { str ( res ) } . " )
return False
raise errors . ServerFailure (
f " Shard split ' { str ( split_opts . migration_id ) } ' with donor replica set "
f " ' { split_opts . get_donor_name ( ) } ' aborted due to an error: { str ( res ) } " )
except pymongo . errors . PyMongoError :
self . logger . exception (
f " Error running shard split ' { split_opts . migration_id } ' with donor primary on "
f " replica set ' { split_opts . get_donor_name ( ) } ' . " )
raise
def _commit_shard_split ( self , donor_client , split_opts ) : # noqa: D205,D400
self . logger . info ( f " Starting shard split ' { split_opts . migration_id } ' on replica set "
f " ' { split_opts . get_donor_name ( ) } ' . " )
while True :
try :
res = donor_client . admin . command ( {
" commitShardSplit " : 1 , " migrationId " : split_opts . get_migration_id_as_binary ( ) ,
" tenantIds " : split_opts . tenant_ids ,
" recipientTagName " : split_opts . recipient_tag_name , " recipientSetName " :
split_opts . recipient_set_name
} , bson . codec_options . CodecOptions ( uuid_representation = bson . binary . UUID_SUBTYPE ) )
if res [ " state " ] == " committed " :
self . logger . info ( f " Shard split ' { split_opts . migration_id } ' on replica set "
f " ' { split_opts . get_donor_name ( ) } ' has committed. " )
return res
if res [ " state " ] == " aborted " :
self . logger . info ( f " Shard split ' { split_opts . migration_id } ' on replica set "
f " ' { split_opts . get_donor_name ( ) } ' has aborted: { str ( res ) } . " )
return res
if not res [ " ok " ] :
self . logger . info ( f " Shard split ' { split_opts . migration_id } ' on replica set "
f " ' { split_opts . get_donor_name ( ) } ' has failed: { str ( res ) } . " )
return res
except pymongo . errors . ConnectionFailure :
self . logger . info (
f " Retrying shard split ' { split_opts . migration_id } ' against replica set "
f " ' { split_opts . get_donor_name ( ) } ' . " )
def _forget_shard_split ( self , donor_client , split_opts ) :
self . logger . info ( f " Forgetting shard split ' { split_opts . migration_id } ' on replica set "
f " ' { split_opts . get_donor_name ( ) } ' . " )
while True :
try :
donor_client . admin . command (
{ " forgetShardSplit " : 1 , " migrationId " : split_opts . get_migration_id_as_binary ( ) } ,
bson . codec_options . CodecOptions ( uuid_representation = bson . binary . UUID_SUBTYPE ) )
return
except pymongo . errors . ConnectionFailure :
self . logger . info (
f " Retrying forget shard split ' { split_opts . migration_id } ' against replica "
f " set ' { split_opts . get_donor_name ( ) } ' . " )
continue
except pymongo . errors . OperationFailure as err :
if err . code != self . NO_SUCH_MIGRATION_ERR_CODE :
raise
# The fixture was restarted.
self . logger . info (
f " Could not find shard split ' { split_opts . migration_id } ' on donor primary on "
f " replica set ' { split_opts . get_donor_name ( ) } ' : { str ( err ) } . " )
return
except pymongo . errors . PyMongoError :
self . logger . exception (
f " Error forgetting shard split ' { split_opts . migration_id } ' on donor primary on "
f " replica set ' { split_opts . get_donor_name ( ) } ' . " )
raise
def _wait_for_garbage_collection ( self , split_opts ) : # noqa: D205,D400
try :
donor_nodes = split_opts . get_donor_nodes ( )
for donor_node in donor_nodes :
self . logger . info (
f " Waiting for shard split ' { split_opts . migration_id } ' to be garbage collected on donor node on port { donor_node . port } of replica set ' { split_opts . get_donor_name ( ) } ' . "
)
donor_node_client = self . _create_client ( donor_node )
while True :
try :
res = donor_node_client . config . command ( {
" count " : " tenantSplitDonors " ,
" query " : { " tenantIds " : split_opts . tenant_ids }
} )
if res [ " n " ] == 0 :
break
except pymongo . errors . ConnectionFailure :
self . logger . info (
f " Retrying waiting for shard split ' { split_opts . migration_id } ' to be garbage collected on donor node on port { donor_node . port } of replica set ' { split_opts . get_donor_name ( ) } ' . "
)
continue
time . sleep ( self . POLL_INTERVAL_SECS )
recipient_nodes = split_opts . get_recipient_nodes ( )
for recipient_node in recipient_nodes :
self . logger . info (
f " Waiting for shard split ' { split_opts . migration_id } ' to be garbage collected on recipient node on port { recipient_node . port } of replica set ' { split_opts . recipient_set_name } ' . "
)
recipient_node_client = self . _create_client ( recipient_node )
while True :
try :
res = recipient_node_client . config . command ( {
" count " : " tenantSplitDonors " ,
" query " : { " tenantIds " : split_opts . tenant_ids }
} )
if res [ " n " ] == 0 :
break
except pymongo . errors . ConnectionFailure :
self . logger . info (
f " Retrying waiting for shard split ' { split_opts . migration_id } ' to be garbage collected on recipient node on port { recipient_node . port } of replica set ' { split_opts . recipient_set_name } ' . "
)
continue
time . sleep ( self . POLL_INTERVAL_SECS )
except pymongo . errors . PyMongoError :
self . logger . exception (
f " Error waiting for shard split ' { split_opts . migration_id } ' from donor replica set ' { split_opts . get_donor_name ( ) } to recipient replica set ' { split_opts . recipient_set_name } ' to be garbage collected. "
)
raise
2022-04-26 22:23:07 +00:00
def _wait_for_reroute_or_test_completion ( self , donor_client , split_opts ) :
start_time = time . time ( )
self . logger . info (
f " Waiting for donor primary of replica set ' { split_opts . get_donor_name ( ) } ' for shard split ' { split_opts . migration_id } ' to reroute at least one conflicting command. Stop waiting when the test finishes. "
)
while not self . __lifecycle . is_test_finished ( ) :
try :
# We are reusing the infrastructure originally developed for tenant migrations,
# and aren't concerned about conflicts because we don't expect the tenant migration
# and shard split hooks to run concurrently.
doc = donor_client [ " testTenantMigration " ] [ " rerouted " ] . find_one (
{ " _id " : split_opts . get_migration_id_as_binary ( ) } )
if doc is not None :
return
except pymongo . errors . ConnectionFailure :
self . logger . info (
f " Retrying waiting for donor primary of replica set ' { split_opts . get_donor_name ( ) } ' for shard split ' { split_opts . migration_id } ' to reroute at least one conflicting command. "
)
continue
except pymongo . errors . PyMongoError :
end_time = time . time ( )
self . logger . exception (
f " Error running find command on donor primary replica set ' { split_opts . get_donor_name ( ) } ' after waiting for reroute for { ( end_time - start_time ) * 1000 } ms "
)
raise
time . sleep ( self . POLL_INTERVAL_SECS )