diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 11c9dc1b738..bd1529418ae 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger", "branch": "mongodb-master", - "commit": "cc6d98c24d81e06a0e7def863c11c3783f81f0f9" + "commit": "3950a0b9fd1bcb73565df8a4b7b165b44c64c5f9" } diff --git a/src/third_party/wiredtiger/src/conn/conn_layered.c b/src/third_party/wiredtiger/src/conn/conn_layered.c index 2ff78b7b896..631e4db2801 100644 --- a/src/third_party/wiredtiger/src/conn/conn_layered.c +++ b/src/third_party/wiredtiger/src/conn/conn_layered.c @@ -2136,7 +2136,7 @@ __layered_iterate_ingest_tables_for_gc_pruning( for (i = 0; i < manager->open_layered_table_count; i++) { if ((entry = manager->entries[i]) == NULL) continue; - WT_ERR(__wt_buf_setstr(session, layered_table_uri_buf, entry->layered_uri)); + ret = __wt_buf_setstr(session, layered_table_uri_buf, entry->layered_uri); /* * Unlock the mutex while handling a table since while updating the prune timestamp we get a @@ -2147,8 +2147,13 @@ __layered_iterate_ingest_tables_for_gc_pruning( * probably do nothing), or miss an element to prune (it will be visited next time). */ __wt_spin_unlock(session, &manager->layered_table_lock); + + /* Check the buffer-copy result here to avoid returning with the mutex held. */ + WT_ERR(ret); + WT_ERR(__layered_update_ingest_table_prune_timestamp( session, layered_table_uri_buf->data, checkpoint_timestamp, uri_at_checkpoint_buf)); + __wt_spin_lock(session, &manager->layered_table_lock); } __wt_spin_unlock(session, &manager->layered_table_lock); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 5d2f1b5564e..038ed8e110b 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -2896,6 +2896,14 @@ __rec_split_discard(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_PAGE *page) btree = S2BT(session); mod = page->modify; + /* + * Free the disaggregated block only if it is not a block replacement or if it is the root page. + * If the page has undergone a split in the past but has not been split during the current + * reconciliation, ensure that the previous blocks are freed, as this situation does not qualify + * as a block replacement. + */ + bool free_blocks = page->disagg_info == NULL || mod->mod_multi_entries != 1 || + r->multi_next != 1 || __wt_ref_is_root(r->ref); /* * A page that split is being reconciled for the second, or subsequent time; discard underlying * block space used in the last reconciliation that is not being reused for this reconciliation. @@ -2908,22 +2916,16 @@ __rec_split_discard(WT_SESSION_IMPL *session, WTI_RECONCILE *r, WT_PAGE *page) __wt_free(session, multi->supd); /* - * If the page was re-written free the backing disk blocks used in the previous write. The + * If the page was re-written, free the backing disk blocks used in the previous write. The * page may instead have been a disk image with associated saved updates: ownership of the * disk image is transferred when rewriting the page in-memory and there may not have been * saved updates. We've gotten this wrong a few times, so use the existence of an address to * confirm backing blocks we care about, and free any disk image/saved updates. */ if (multi->addr.block_cookie != NULL) { - if (multi->block_meta == NULL) + if (free_blocks) WT_RET(__wt_btree_block_free( session, multi->addr.block_cookie, multi->addr.block_cookie_size)); - /* Free disagg block only if it is not a block replacement or it is the root page. */ - else if (r->multi_next != 1 || __wt_ref_is_root(r->ref)) { - WT_RET(__wt_btree_block_free( - session, multi->addr.block_cookie, multi->addr.block_cookie_size)); - multi->block_meta->page_id = WT_BLOCK_INVALID_PAGE_ID; - } __wt_free(session, multi->addr.block_cookie); } __wt_free(session, multi->block_meta); diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml index 6c98d01d62f..f93a3836ca4 100644 --- a/src/third_party/wiredtiger/test/evergreen.yml +++ b/src/third_party/wiredtiger/test/evergreen.yml @@ -378,7 +378,8 @@ functions: mkdir -p cmake_build cd cmake_build - echo "Call CMake" + echo "Calling CMake with command:" + echo $CMAKE $DEFINED_EVERGREEN_CONFIG_FLAGS ${posix_configure_flags|} -G "${cmake_generator|Ninja}" ./.. $CMAKE $DEFINED_EVERGREEN_CONFIG_FLAGS ${posix_configure_flags|} -G "${cmake_generator|Ninja}" ./.. echo "Completed CMake" fi diff --git a/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-16017 b/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.disagg.WT-16017 similarity index 100% rename from src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-16017 rename to src/third_party/wiredtiger/test/format/failure_configs/CONFIG.disagg.WT-16017 diff --git a/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.disagg.WT-16038 b/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.disagg.WT-16038 new file mode 100644 index 00000000000..c422f182bc5 --- /dev/null +++ b/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.disagg.WT-16038 @@ -0,0 +1,213 @@ +############################################ +# RUN PARAMETERS: V3 +############################################ +assert.read_timestamp=0 +background_compact=0 +background_compact.free_space_target=97 +backup=0 +backup.incremental=off +backup.incr_granularity=12597 +backup.live_restore=0 +backup.live_restore_read_size=2 +backup.live_restore_threads=9 +block_cache=0 +block_cache.cache_on_checkpoint=0 +block_cache.cache_on_writes=0 +block_cache.size=63 +cache=2048 +cache.evict_max=0 +cache.eviction_dirty_target=0 +cache.eviction_dirty_trigger=0 +cache.eviction_updates_target=0 +cache.eviction_updates_trigger=0 +cache.minimum=0 +cache.maximum=0 +checkpoint=on +checkpoint.log_size=59 +checkpoint.wait=39 +chunk_cache=0 +chunk_cache.capacity=2517 +chunk_cache.chunk_size=2 +chunk_cache.storage_path=off +chunk_cache.type=off +compact.free_space_target=44 +debug.background_compact=0 +debug.checkpoint_retention=8 +debug.cursor_reposition=0 +debug.eviction=0 +debug.log_retention=6 +debug.realloc_exact=0 +debug.realloc_malloc=0 +debug.slow_checkpoint=0 +debug.table_logging=0 +debug.update_restore_evict=0 +disagg.internal_page_delta=0 +disagg.leaf_page_delta=0 +disagg.multi=0 +disagg.enabled=1 +disagg.layered=1 +disagg.mode=leader +disagg.page_log=palite +disagg.page_log.verbose=0 +disk.data_extend=0 +disk.encryption=none +disk.mmap=1 +disk.mmap_all=0 +eviction.evict_use_softptr=0 +file_manager.close_handle_minimum=28 +file_manager.close_idle_time=57 +file_manager.close_scan_interval=10 +format.abort=0 +format.independent_thread_rng=1 +format.major_timeout=0 +import=0 +logging=0 +logging.compression=none +logging.file_max=117482 +logging.prealloc=1 +logging.remove=0 +obsolete_cleanup.method=off +obsolete_cleanup.wait=360 +ops.alter=0 +ops.compaction=0 +ops.hs_cursor=0 +ops.pct.modify=0 +ops.bound_cursor=0 +ops.prepare=0 +ops.random_cursor=0 +ops.salvage=0 +ops.throttle=0 +ops.throttle.sleep_us=314201 +ops.truncate=0 +ops.verify=1 +prefetch=1 +precise_checkpoint=1 +preserve_prepared=0 +quiet=0 +random.data_seed=14704445 +random.extra_seed=11689594 +runs.in_memory=0 +runs.ops=500000 +runs.predictable_replay=0 +runs.rows=180000 +runs.source=layered +runs.tables=1 +runs.threads=30 +runs.timer=360 +runs.type=row-store +runs.verify_failure_dump=0 +statistics.mode=all +statistics_log.sources=off +stress.aggressive_stash_free=0 +stress.aggressive_sweep=0 +stress.checkpoint=0 +stress.checkpoint_evict_page=0 +stress.checkpoint_prepare=0 +stress.compact_slow=0 +stress.evict_reposition=0 +stress.failpoint_eviction_split=0 +stress.failpoint_hs_delete_key_from_ts=1 +stress.failpoint_rec_before_wrapup=0 +stress.hs_checkpoint_delay=1 +stress.hs_search=0 +stress.hs_sweep=0 +stress.prefetch_delay=0 +stress.prepare_resolution_1=0 +stress.sleep_before_read_overflow_onpage=0 +stress.split_1=0 +stress.split_2=0 +stress.split_3=0 +stress.split_4=0 +stress.split_5=0 +stress.split_6=0 +stress.split_7=0 +stress.split_8=0 +tiered_storage.flush_frequency=0 +tiered_storage.storage_source=off +transaction.implicit=0 +transaction.operation_timeout_ms=2000 +transaction.timestamps=1 +wiredtiger.config=off +wiredtiger.rwlock=1 +wiredtiger.leak_memory=0 +############################################ +# TABLE PARAMETERS: table 1 +############################################ +table1.btree.compression=none +table1.btree.dictionary=0 +table1.btree.internal_key_truncation=1 +table1.btree.internal_page_max=13 +table1.btree.key_max=105 +table1.btree.key_min=24 +table1.btree.leaf_page_max=13 +table1.btree.memory_page_max=1 +table1.btree.prefix_len=0 +table1.btree.prefix_compression=1 +table1.btree.prefix_compression_min=2 +table1.btree.reverse=0 +table1.btree.split_pct=74 +table1.btree.value_max=2535 +table1.btree.value_min=3 +table1.disk.checksum=off +table1.disk.firstfit=0 +table1.ops.pareto=0 +table1.ops.pareto.skew=90 +table1.ops.pct.delete=24 +table1.ops.pct.insert=29 +table1.ops.pct.read=41 +table1.ops.pct.write=6 +table1.runs.mirror=0 +############################################ +# TABLE PARAMETERS: table 2 +############################################ +table2.btree.compression=none +table2.btree.dictionary=1 +table2.btree.internal_key_truncation=1 +table2.btree.internal_page_max=15 +table2.btree.key_max=62 +table2.btree.key_min=29 +table2.btree.leaf_page_max=17 +table2.btree.memory_page_max=5 +table2.btree.prefix_len=0 +table2.btree.prefix_compression=1 +table2.btree.prefix_compression_min=5 +table2.btree.reverse=1 +table2.btree.split_pct=50 +table2.btree.value_max=3649 +table2.btree.value_min=4 +table2.disk.checksum=on +table2.disk.firstfit=0 +table2.ops.pareto=0 +table2.ops.pareto.skew=91 +table2.ops.pct.delete=3 +table2.ops.pct.insert=14 +table2.ops.pct.read=3 +table2.ops.pct.write=80 +table2.runs.mirror=0 +############################################ +# TABLE PARAMETERS: table 3 +############################################ +table3.btree.compression=none +table3.btree.dictionary=1 +table3.btree.internal_key_truncation=1 +table3.btree.internal_page_max=17 +table3.btree.key_max=65 +table3.btree.key_min=17 +table3.btree.leaf_page_max=12 +table3.btree.memory_page_max=7 +table3.btree.prefix_len=0 +table3.btree.prefix_compression=0 +table3.btree.prefix_compression_min=8 +table3.btree.reverse=0 +table3.btree.split_pct=54 +table3.btree.value_max=3313 +table3.btree.value_min=2 +table3.disk.checksum=on +table3.disk.firstfit=0 +table3.ops.pareto=0 +table3.ops.pareto.skew=22 +table3.ops.pct.delete=0 +table3.ops.pct.insert=96 +table3.ops.pct.read=1 +table3.ops.pct.write=3 +table3.runs.mirror=0 diff --git a/src/third_party/wiredtiger/test/suite/helper_disagg.py b/src/third_party/wiredtiger/test/suite/helper_disagg.py index 5bab2a2d050..7ad308e43a9 100644 --- a/src/third_party/wiredtiger/test/suite/helper_disagg.py +++ b/src/third_party/wiredtiger/test/suite/helper_disagg.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. # -import functools, os, wttest +import functools, os, shutil, wttest # These routines help run the various page log sources used by disaggregated storage. # They are required to manage the generation of disaggregated storage specific configurations. @@ -108,12 +108,17 @@ def disagg_test_class(cls): return disagg_test_case_class -# This mixin class provides disaggregated storage configuration methods. +# This mixin class provides disaggregated storage configuration methods and a few utility functions. class DisaggConfigMixin: + + # Configuration parameters, can be overridden in test class disagg_verbose = 0 # (0 <= level <=3) can be overridden in test class disagg_config = None # a string, can be overridden in test class palm_cache_size_mb = -1 # this uses the default, can be overridden + # Internal state tracking + num_restarts = 0 + # Returns True if the current scenario is disaggregated. def is_disagg_scenario(self): return hasattr(self, 'is_disagg') and self.is_disagg @@ -245,3 +250,40 @@ class DisaggConfigMixin: self.conn.reconfigure('disaggregated=(role="follower")') self.close_conn() self.open_conn(directory, config) + + def restart_without_local_files(self, pickup_checkpoint=True, step_up=False): + """ + Restart the node without local files. + """ + + if pickup_checkpoint: + # Step down to avoid shutdown checkpoint + self.conn.reconfigure('disaggregated=(role="follower")') + checkpoint_meta = self.disagg_get_complete_checkpoint_meta() + + # Close the current connection + self.close_conn() + + # Move the local files to another directory + self.num_restarts += 1 + dir = f'SAVE.{self.num_restarts}' + os.mkdir(dir) + for f in os.listdir(): + if os.path.isdir(f): + continue + if f.startswith('WiredTiger') or f.endswith('.wt') or f.endswith('.wt_ingest'): + os.rename(f, os.path.join(dir, f)) + + # Also save the PALI database (to aid debugging) + shutil.copytree('kv_home', os.path.join(dir, 'kv_home')) + + # Reopen the connection + self.open_conn() + + # Pick up the last checkpoint + if pickup_checkpoint: + self.conn.reconfigure(f'disaggregated=(checkpoint_meta="{checkpoint_meta}")') + + # Step up as the leader + if step_up: + self.conn.reconfigure(f'disaggregated=(role="leader")') diff --git a/src/third_party/wiredtiger/test/suite/test_layered15.py b/src/third_party/wiredtiger/test/suite/test_layered15.py index a7120ddf209..0828cacde02 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered15.py +++ b/src/third_party/wiredtiger/test/suite/test_layered15.py @@ -26,7 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import os, os.path, shutil, wttest +import wttest from helper_disagg import disagg_test_class, gen_disagg_storages from wtscenario import make_scenarios @@ -54,32 +54,6 @@ class test_layered15(wttest.WiredTigerTestCase): disagg_storages = gen_disagg_storages('test_layered15', disagg_only = True) scenarios = make_scenarios(disagg_storages) - num_restarts = 0 - - # Restart the node without local files - def restart_without_local_files(self): - # Step down to ensure that there are no more checkpoints - self.conn.reconfigure(f'disaggregated=(role=follower)') - - # Close the current connection - self.close_conn() - - # Move the local files to another directory - self.num_restarts += 1 - dir = f'SAVE.{self.num_restarts}' - os.mkdir(dir) - for f in os.listdir(): - if os.path.isdir(f): - continue - if f.startswith('WiredTiger') or f.startswith('test_'): - os.rename(f, os.path.join(dir, f)) - - # Also save the PALM database (to aid debugging) - shutil.copytree('kv_home', os.path.join(dir, 'kv_home')) - - # Reopen the connection - self.open_conn() - # Ensure that the metadata cursor has all the expected URIs. def check_metadata_cursor(self, expect_contains, expect_missing = []): cursor = self.session.open_cursor('metadata:', None, None) @@ -139,8 +113,11 @@ class test_layered15(wttest.WiredTigerTestCase): # ------------------------------ Restart 1 ------------------------------ # + # Step down to ensure that there are no more checkpoints + self.conn.reconfigure(f'disaggregated=(role=follower)') + # Reopen the connection - self.restart_without_local_files() + self.restart_without_local_files(pickup_checkpoint=False) # There should be no shared URIs in the metadata table at this point self.check_metadata_cursor([], self.with_ingest_uris) @@ -229,8 +206,11 @@ class test_layered15(wttest.WiredTigerTestCase): # ------------------------------ Restart 2 ------------------------------ # + # Step down to ensure that there are no more checkpoints + self.conn.reconfigure(f'disaggregated=(role=follower)') + # Reopen the connection - self.restart_without_local_files() + self.restart_without_local_files(pickup_checkpoint=False) # Pick up the checkpoint self.conn.reconfigure(f'disaggregated=(checkpoint_meta="{checkpoint_meta}")') diff --git a/src/third_party/wiredtiger/test/suite/test_layered25.py b/src/third_party/wiredtiger/test/suite/test_layered25.py index bfc61cee89b..2396252568b 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered25.py +++ b/src/third_party/wiredtiger/test/suite/test_layered25.py @@ -26,7 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import os, os.path, shutil, wiredtiger, wttest +import wiredtiger, wttest from helper_disagg import disagg_test_class, gen_disagg_storages from wtscenario import make_scenarios @@ -53,29 +53,6 @@ class test_layered25(wttest.WiredTigerTestCase): ('shared', dict(prefix='table:', table_config='block_manager=disagg,log=(enabled=false)')), ]) - num_restarts = 0 - - # Restart the node without local files - def restart_without_local_files(self): - # Close the current connection - self.close_conn() - - # Move the local files to another directory - self.num_restarts += 1 - dir = f'SAVE.{self.num_restarts}' - os.mkdir(dir) - for f in os.listdir(): - if os.path.isdir(f): - continue - if f.startswith('WiredTiger') or f.startswith('test_'): - os.rename(f, os.path.join(dir, f)) - - # Also save the PALM database (to aid debugging) - shutil.copytree('kv_home', os.path.join(dir, 'kv_home')) - - # Reopen the connection - self.open_conn() - # Start without local files and test historical reads. def test_layered25(self): # The node started as a follower, so step it up as the leader @@ -161,10 +138,7 @@ class test_layered25(wttest.WiredTigerTestCase): # Part 2: Restart without local files # - checkpoint_meta = self.disagg_get_complete_checkpoint_meta() - self.restart_without_local_files() - self.conn.reconfigure(f'disaggregated=(checkpoint_meta="{checkpoint_meta}")') - self.conn.reconfigure(f'disaggregated=(role="leader")') + self.restart_without_local_files(step_up=True) # Avoid checkpoint error with precise checkpoint self.conn.set_timestamp(f'stable_timestamp={self.timestamp_str(timestamp2)}') diff --git a/src/third_party/wiredtiger/test/suite/test_layered30.py b/src/third_party/wiredtiger/test/suite/test_layered30.py index ec528b907d6..cb1f41a5193 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered30.py +++ b/src/third_party/wiredtiger/test/suite/test_layered30.py @@ -26,7 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import os, os.path, shutil, wiredtiger, wttest +import wiredtiger, wttest from helper_disagg import disagg_test_class, gen_disagg_storages from wtscenario import make_scenarios @@ -56,29 +56,6 @@ class test_layered30(wttest.WiredTigerTestCase): ('two-tables', dict(another_table=True)), ]) - num_restarts = 0 - - # Restart the node without local files - def restart_without_local_files(self): - # Close the current connection - self.close_conn() - - # Move the local files to another directory - self.num_restarts += 1 - dir = f'SAVE.{self.num_restarts}' - os.mkdir(dir) - for f in os.listdir(): - if os.path.isdir(f): - continue - if f.startswith('WiredTiger') or f.startswith('test_'): - os.rename(f, os.path.join(dir, f)) - - # Also save the PALM database (to aid debugging) - shutil.copytree('kv_home', os.path.join(dir, 'kv_home')) - - # Reopen the connection - self.open_conn() - # Test creating an empty table. def test_layered30(self): # The node started as a follower, so step it up as the leader @@ -126,10 +103,7 @@ class test_layered30(wttest.WiredTigerTestCase): # Part 2: Check the new table after restart # - checkpoint_meta = self.disagg_get_complete_checkpoint_meta() - self.restart_without_local_files() - self.conn.reconfigure(f'disaggregated=(checkpoint_meta="{checkpoint_meta}")') - self.conn.reconfigure(f'disaggregated=(role="leader")') + self.restart_without_local_files(step_up=True) # Avoid checkpoint error with precise checkpoint self.conn.set_timestamp('stable_timestamp=1') diff --git a/src/third_party/wiredtiger/test/suite/test_layered36.py b/src/third_party/wiredtiger/test/suite/test_layered36.py index 45f3daebaf8..30e064c556b 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered36.py +++ b/src/third_party/wiredtiger/test/suite/test_layered36.py @@ -26,7 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import os, os.path, shutil, wiredtiger, wttest +import wiredtiger, wttest from helper_disagg import disagg_test_class, gen_disagg_storages from wtscenario import make_scenarios @@ -53,29 +53,6 @@ class test_layered36(wttest.WiredTigerTestCase): ('layered-type', dict(prefix='table:', table_config='block_manager=disagg,type=layered')), ]) - num_restarts = 0 - - # Restart the node without local files - def restart_without_local_files(self): - # Close the current connection - self.close_conn() - - # Move the local files to another directory - self.num_restarts += 1 - dir = f'SAVE.{self.num_restarts}' - os.mkdir(dir) - for f in os.listdir(): - if os.path.isdir(f): - continue - if f.startswith('WiredTiger') or f.startswith('test_'): - os.rename(f, os.path.join(dir, f)) - - # Also save the PALM database (to aid debugging) - shutil.copytree('kv_home', os.path.join(dir, 'kv_home')) - - # Reopen the connection - self.open_conn() - # A simple test with a single node. def test_layered36(self): @@ -101,9 +78,7 @@ class test_layered36(wttest.WiredTigerTestCase): self.session.checkpoint() # Restart without local files to check that the tables are created and have correct data. - checkpoint_meta = self.disagg_get_complete_checkpoint_meta() self.restart_without_local_files() - self.conn.reconfigure(f'disaggregated=(checkpoint_meta="{checkpoint_meta}")') # Check the tables cursor = self.session.open_cursor(uri_empty, None, None) diff --git a/src/third_party/wiredtiger/test/suite/test_layered60.py b/src/third_party/wiredtiger/test/suite/test_layered60.py index a55dfc38f07..f91e6701226 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered60.py +++ b/src/third_party/wiredtiger/test/suite/test_layered60.py @@ -26,7 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import os, os.path, shutil, threading, time, wiredtiger, wttest +import threading, time, wiredtiger, wttest from helper_disagg import disagg_test_class, gen_disagg_storages from wtscenario import make_scenarios @@ -48,29 +48,6 @@ class test_layered60(wttest.WiredTigerTestCase): disagg_storages = gen_disagg_storages('test_layered60', disagg_only = True) scenarios = make_scenarios(disagg_storages) - num_restarts = 0 - - # Restart the node without local files - def restart_without_local_files(self): - # Close the current connection - self.close_conn() - - # Move the local files to another directory - self.num_restarts += 1 - dir = f'SAVE.{self.num_restarts}' - os.mkdir(dir) - for f in os.listdir(): - if os.path.isdir(f): - continue - if f.startswith('WiredTiger') or f.startswith('test_'): - os.rename(f, os.path.join(dir, f)) - - # Also save the PALI database (to aid debugging) - shutil.copytree('kv_home', os.path.join(dir, 'kv_home')) - - # Reopen the connection - self.open_conn() - # Wait for a checkpoint to start running def wait_for_checkpoint_start(self): while True: @@ -147,13 +124,7 @@ class test_layered60(wttest.WiredTigerTestCase): # Part 2: Check the new table after restart # - checkpoint_meta = self.disagg_get_complete_checkpoint_meta() - self.restart_without_local_files() - - # Pick up the latest checkpoint and then step up as the leader. Do this in two steps, to - # mimic the behavior of the server. - self.conn.reconfigure(f'disaggregated=(checkpoint_meta="{checkpoint_meta}")') - self.conn.reconfigure(f'disaggregated=(role="leader")') + self.restart_without_local_files(step_up=True) # Avoid checkpoint error with precise checkpoint self.conn.set_timestamp('stable_timestamp=1') diff --git a/src/third_party/wiredtiger/test/suite/test_layered62.py b/src/third_party/wiredtiger/test/suite/test_layered62.py index ebcf7022c7e..a1bebabca66 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered62.py +++ b/src/third_party/wiredtiger/test/suite/test_layered62.py @@ -26,7 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import os, os.path, shutil, threading, time, wiredtiger, wttest +import threading, time, wiredtiger, wttest from helper_disagg import disagg_test_class, gen_disagg_storages from wtscenario import make_scenarios @@ -58,29 +58,6 @@ class test_layered62(wttest.WiredTigerTestCase): disagg_storages = gen_disagg_storages('test_layered62', disagg_only = True) scenarios = make_scenarios(disagg_storages) - num_restarts = 0 - - # Restart the node without local files - def restart_without_local_files(self): - # Close the current connection - self.close_conn() - - # Move the local files to another directory - self.num_restarts += 1 - dir = f'SAVE.{self.num_restarts}' - os.mkdir(dir) - for f in os.listdir(): - if os.path.isdir(f): - continue - if f.startswith('WiredTiger') or f.startswith('test_'): - os.rename(f, os.path.join(dir, f)) - - # Also save the PALI database (to aid debugging) - shutil.copytree('kv_home', os.path.join(dir, 'kv_home')) - - # Reopen the connection - self.open_conn() - # Wait for a checkpoint to start running def wait_for_checkpoint_start(self): while True: @@ -107,13 +84,8 @@ class test_layered62(wttest.WiredTigerTestCase): self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(1)) self.session.checkpoint() - # Prevent the shutdown checkpoint. - self.conn.reconfigure('disaggregated=(role="follower")') - # Reopen the connection as a follower. - checkpoint_meta = self.disagg_get_complete_checkpoint_meta() self.restart_without_local_files() - self.conn.reconfigure(f'disaggregated=(checkpoint_meta="{checkpoint_meta}")') # # Part 1: Step up while a checkpoint is running. @@ -207,9 +179,7 @@ class test_layered62(wttest.WiredTigerTestCase): self.assertEqual(checkpoint_timestamp, 3) # Reopen the connection. - checkpoint_meta = self.disagg_get_complete_checkpoint_meta() self.restart_without_local_files() - self.conn.reconfigure(f'disaggregated=(checkpoint_meta="{checkpoint_meta}")') # Check that all the data is present. cursor = self.session.open_cursor(self.uri, None, None) diff --git a/src/third_party/wiredtiger/test/suite/test_layered64.py b/src/third_party/wiredtiger/test/suite/test_layered64.py index e5525b89377..7a891d327ef 100644 --- a/src/third_party/wiredtiger/test/suite/test_layered64.py +++ b/src/third_party/wiredtiger/test/suite/test_layered64.py @@ -26,7 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import os, os.path, shutil, re, wiredtiger, wttest +import re, wiredtiger, wttest from helper_disagg import disagg_test_class, gen_disagg_storages from wtscenario import make_scenarios @@ -47,29 +47,6 @@ class test_layered64(wttest.WiredTigerTestCase): disagg_storages = gen_disagg_storages('test_layered64', disagg_only = True) scenarios = make_scenarios(disagg_storages) - num_restarts = 0 - - # Restart the node without local files - def restart_without_local_files(self): - # Close the current connection - self.close_conn() - - # Move the local files to another directory - self.num_restarts += 1 - dir = f'SAVE.{self.num_restarts}' - os.mkdir(dir) - for f in os.listdir(): - if os.path.isdir(f): - continue - if f.startswith('WiredTiger') or f.startswith('test_'): - os.rename(f, os.path.join(dir, f)) - - # Also save the PALI database (to aid debugging) - shutil.copytree('kv_home', os.path.join(dir, 'kv_home')) - - # Reopen the connection - self.open_conn() - # Test checkpoint metadata checksums. def test_layered64(self): self.conn.reconfigure('disaggregated=(role="leader")') @@ -100,7 +77,7 @@ class test_layered64(wttest.WiredTigerTestCase): # Prevent the shutdown checkpoint, and restart as follower. self.conn.reconfigure('disaggregated=(role="follower")') - self.restart_without_local_files() + self.restart_without_local_files(pickup_checkpoint=False) # Ensure that we can pick up the checkpoint without a checksum. checkpoint_meta_no_checksum = re.sub(r',metadata_checksum=[0-9a-fA-F]+', '', checkpoint_meta) @@ -115,7 +92,7 @@ class test_layered64(wttest.WiredTigerTestCase): cursor.close() # Restart again. - self.restart_without_local_files() + self.restart_without_local_files(pickup_checkpoint=False) # Corrupt the checksum. Ensure that the follower cannot pick up the checkpoint. corrupted_checksum_int = checksum_int ^ 0xFF