diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c index ac4da42ab08..9cc56c56452 100644 --- a/src/btree/bt_compact.c +++ b/src/btree/bt_compact.c @@ -122,17 +122,9 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]) * We need to ensure we don't race with page reconciliation as it's * writing the page modify information. * - * There are three ways we call reconciliation: checkpoints, threads - * writing leaf pages (usually in preparation for a checkpoint or if - * closing a file), and eviction. - * - * We're holding the schema lock which serializes with checkpoints. - */ - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); - - /* - * Get the tree handle's flush lock which blocks threads writing leaf - * pages. + * There are two ways we call reconciliation: checkpoints and eviction. + * Get the tree's flush lock which blocks threads writing pages for + * checkpoints. */ __wt_spin_lock(session, &btree->flush_lock); diff --git a/src/include/extern.h b/src/include/extern.h index b4018eb1191..1002c2c0c26 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -627,7 +627,6 @@ extern int __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const ch extern int __wt_session_range_truncate(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop); extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp); extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp); -extern int __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp); extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config); extern int __wt_session_compact_readonly( WT_SESSION *wt_session, const char *uri, const char *config); extern int __wt_session_lock_dhandle( WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp); diff --git a/src/include/session.h b/src/include/session.h index b3c475805a4..5c442bf0b81 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -126,11 +126,24 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl { void *block_manager; /* Block-manager support */ int (*block_manager_cleanup)(WT_SESSION_IMPL *); - /* Checkpoint support */ + /* Checkpoint handles */ WT_DATA_HANDLE **ckpt_handle; /* Handle list */ u_int ckpt_handle_next; /* Next empty slot */ size_t ckpt_handle_allocated; /* Bytes allocated */ + /* + * Operations acting on handles. + * + * The preferred pattern is to gather all of the required handles at + * the beginning of an operation, then drop any other locks, perform + * the operation, then release the handles. This cannot be easily + * merged with the list of checkpoint handles because some operation + * (such as compact) do checkpoints internally. + */ + WT_DATA_HANDLE **op_handle; /* Handle list */ + u_int op_handle_next; /* Next empty slot */ + size_t op_handle_allocated; /* Bytes allocated */ + void *reconcile; /* Reconciliation support */ int (*reconcile_cleanup)(WT_SESSION_IMPL *); diff --git a/src/session/session_compact.c b/src/session/session_compact.c index 564f48094df..d2fd995b554 100644 --- a/src/session/session_compact.c +++ b/src/session/session_compact.c @@ -97,13 +97,13 @@ */ /* - * __wt_compact_uri_analyze -- + * __compact_uri_analyze -- * Extract information relevant to deciding what work compact needs to * do from a URI that is part of a table schema. * Called via the schema_worker function. */ -int -__wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp) +static int +__compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp) { /* * Add references to schema URI objects to the list of objects to be @@ -119,6 +119,61 @@ __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp) return (0); } +/* + * __compact_start -- + * Start object compaction. + */ +static int +__compact_start(WT_SESSION_IMPL *session) +{ + WT_BM *bm; + + bm = S2BT(session)->bm; + return (bm->compact_start(bm, session)); +} + +/* + * __compact_end -- + * End object compaction. + */ +static int +__compact_end(WT_SESSION_IMPL *session) +{ + WT_BM *bm; + + bm = S2BT(session)->bm; + return (bm->compact_end(bm, session)); +} + +/* + * __compact_handle_append -- + * Gather a file handle to be compacted. + * Called via the schema_worker function. + */ +static int +__compact_handle_append(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_DECL_RET; + + WT_UNUSED(cfg); + + /* Make sure there is space for the next entry. */ + WT_RET(__wt_realloc_def(session, &session->op_handle_allocated, + session->op_handle_next + 1, &session->op_handle)); + + WT_RET(__wt_session_get_btree( + session, session->dhandle->name, NULL, NULL, 0)); + + /* Set compact active on the handle. */ + if ((ret = __compact_start(session)) != 0) { + WT_TRET(__wt_session_release_btree(session)); + return (ret); + } + + session->op_handle[session->op_handle_next++] = session->dhandle; + return (0); +} + /* * __session_compact_check_timeout -- * Check if the timeout has been exceeded. @@ -138,74 +193,30 @@ __session_compact_check_timeout( return (0); } -/* - * __compact_start -- - * Start objection compaction. - */ -static int -__compact_start(WT_SESSION_IMPL *session, const char *cfg[]) -{ - WT_BM *bm; - - WT_UNUSED(cfg); - - bm = S2BT(session)->bm; - return (bm->compact_start(bm, session)); -} - -/* - * __compact_end -- - * End objection compaction. - */ -static int -__compact_end(WT_SESSION_IMPL *session, const char *cfg[]) -{ - WT_BM *bm; - - WT_UNUSED(cfg); - - bm = S2BT(session)->bm; - return (bm->compact_end(bm, session)); -} - /* * __compact_file -- * Function to alternate between checkpoints and compaction calls. */ static int -__compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) +__compact_file(WT_SESSION_IMPL *session, const char *cfg[]) { struct timespec start_time; + WT_DATA_HANDLE *dhandle; WT_DECL_ITEM(t); WT_DECL_RET; - int i, tret; + int i; const char *checkpoint_cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_checkpoint), NULL, NULL }; - /* - * Start object compaction. - * - * XXX - * There's a bug here. We're configuring compaction on a block manager's - * object, and if we fail in the middle of that process, we would either - * leave a set of objects configured for compaction or try to undo that - * configuration on objects we never configured. There isn't any simple - * solution: we could track objects we've successfully configured for - * later cleanup, but we aren't holding locks to prevent racing with - * another compaction doing the same configuration, or the object being - * dropped in the middle of compaction. - */ - WT_WITH_SCHEMA_LOCK(session, ret, - ret = __wt_schema_worker( - session, uri, __compact_start, NULL, cfg, 0)); - WT_RET(ret); + dhandle = session->dhandle; /* * Force the checkpoint: we don't want to skip it because the work we * need to have done is done in the underlying block manager. */ WT_ERR(__wt_scr_alloc(session, 128, &t)); - WT_ERR(__wt_buf_fmt(session, t, "target=(\"%s\"),force=1", uri)); + WT_ERR(__wt_buf_fmt( + session, t, "target=(\"%s\"),force=1", dhandle->name)); checkpoint_cfg[1] = t->data; WT_ERR(__wt_epoch(session, &start_time)); @@ -221,9 +232,8 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_ERR(__wt_txn_checkpoint(session, checkpoint_cfg)); session->compact_state = WT_COMPACT_RUNNING; - WT_WITH_SCHEMA_LOCK(session, ret, - ret = __wt_schema_worker( - session, uri, __wt_compact, NULL, cfg, 0)); + WT_WITH_DHANDLE(session, dhandle, + ret = __wt_compact(session, cfg)); WT_ERR(ret); if (session->compact_state != WT_COMPACT_SUCCESS) break; @@ -235,11 +245,6 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) err: session->compact_state = WT_COMPACT_NONE; - WT_WITH_SCHEMA_LOCK(session, tret, - tret = __wt_schema_worker( - session, uri, __compact_end, NULL, cfg, 0)); - WT_TRET(tret); - __wt_scr_free(session, &t); return (ret); } @@ -257,6 +262,7 @@ __wt_session_compact( WT_DECL_RET; WT_SESSION_IMPL *session; WT_TXN *txn; + u_int i; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, compact, config, cfg); @@ -283,8 +289,8 @@ __wt_session_compact( /* Find the types of data sources are being compacted. */ WT_WITH_SCHEMA_LOCK(session, ret, - ret = __wt_schema_worker( - session, uri, NULL, __wt_compact_uri_analyze, cfg, 0)); + ret = __wt_schema_worker(session, uri, + __compact_handle_append, __compact_uri_analyze, cfg, 0)); WT_ERR(ret); if (session->compact->lsm_count != 0) @@ -301,11 +307,25 @@ __wt_session_compact( WT_ERR_MSG(session, EINVAL, " File compaction not permitted in a transaction"); - WT_ERR(__compact_file(session, uri, cfg)); + for (i = 0; i < session->op_handle_next; ++i) { + WT_WITH_DHANDLE(session, session->op_handle[i], + ret = __compact_file(session, cfg)); + WT_ERR(ret); + } } err: session->compact = NULL; + for (i = 0; i < session->op_handle_next; ++i) { + WT_WITH_DHANDLE(session, session->op_handle[i], + WT_TRET(__compact_end(session))); + WT_WITH_DHANDLE(session, session->op_handle[i], + WT_TRET(__wt_session_release_btree(session))); + } + + __wt_free(session, session->op_handle); + session->op_handle_allocated = session->op_handle_next = 0; + /* * Release common session resources (for example, checkpoint may acquire * significant reconciliation structures/memory).