diff --git a/dist/api_data.py b/dist/api_data.py index 48bbee6cdde..99be61891bd 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -611,7 +611,7 @@ common_wiredtiger_open = [ Config('method', 'fsync', r''' the method used to ensure log records are stable on disk, see @ref tune_durability for more information''', - choices=['dsync', 'fsync', 'none']), + choices=['background', 'dsync', 'fsync', 'none']), ]), ] @@ -754,6 +754,13 @@ methods = { type='boolean'), ]), 'WT_SESSION.strerror' : Method([]), +'WT_SESSION.transaction_sync' : Method([ + Config('timeout', '0', r''' + maximum amount of time to wait for background sync in seconds. + A value of zero disables the timeout''', + type='int'), +]), + 'WT_SESSION.truncate' : Method([]), 'WT_SESSION.upgrade' : Method([]), 'WT_SESSION.verify' : Method([ diff --git a/dist/flags.py b/dist/flags.py index 544e3b5d549..8b89c49a60f 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -29,6 +29,7 @@ flags = { 'LOGSCAN_RECOVER', ], 'log_write' : [ + 'LOG_BACKGROUND', 'LOG_DSYNC', 'LOG_FLUSH', 'LOG_FSYNC', diff --git a/dist/stat_data.py b/dist/stat_data.py index ba6e7ec2459..0a8eccb4cd8 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -286,6 +286,7 @@ connection_stats = [ 'no_clear,no_scale'), TxnStat('txn_pinned_range', 'transaction range of IDs currently pinned', 'no_clear,no_scale'), + TxnStat('txn_sync', 'transaction sync calls'), TxnStat('txn_commit', 'transactions committed'), TxnStat('txn_fail_cache', 'transaction failures due to cache overflow'), TxnStat('txn_rollback', 'transactions rolled back'), diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index e562af73fc3..b54066c801e 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -664,6 +664,10 @@ session_ops(WT_SESSION *session) ret = session->truncate(session, "table:mytable", NULL, NULL, NULL); /*! [Truncate a table] */ + /*! [Transaction sync] */ + ret = session->transaction_sync(session, NULL); + /*! [Transaction sync] */ + { /* * Insert a pair of keys so we can truncate a range. diff --git a/lang/java/java_doc.i b/lang/java/java_doc.i index 53785a3bab4..fa76fdb37e4 100644 --- a/lang/java/java_doc.i +++ b/lang/java/java_doc.i @@ -44,6 +44,7 @@ COPYDOC(__wt_session, WT_SESSION, commit_transaction) COPYDOC(__wt_session, WT_SESSION, rollback_transaction) COPYDOC(__wt_session, WT_SESSION, checkpoint) COPYDOC(__wt_session, WT_SESSION, transaction_pinned_range) +COPYDOC(__wt_session, WT_SESSION, transaction_sync) COPYDOC(__wt_connection, WT_CONNECTION, async_flush) COPYDOC(__wt_connection, WT_CONNECTION, async_new_op) COPYDOC(__wt_connection, WT_CONNECTION, close) diff --git a/src/config/config_def.c b/src/config/config_def.c index 3b043d69c2e..b5c53b44550 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -285,6 +285,11 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_salvage[] = { { NULL, NULL, NULL, NULL, NULL, 0 } }; +static const WT_CONFIG_CHECK confchk_WT_SESSION_transaction_sync[] = { + { "timeout", "int", NULL, NULL, NULL, 0 }, + { NULL, NULL, NULL, NULL, NULL, 0 } +}; + static const WT_CONFIG_CHECK confchk_WT_SESSION_verify[] = { { "dump_address", "boolean", NULL, NULL, NULL, 0 }, { "dump_blocks", "boolean", NULL, NULL, NULL, 0 }, @@ -410,7 +415,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_transaction_sync_subconfigs[] = { { "enabled", "boolean", NULL, NULL, NULL, 0 }, { "method", "string", - NULL, "choices=[\"dsync\",\"fsync\",\"none\"]", + NULL, "choices=[\"background\",\"dsync\",\"fsync\",\"none\"]", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -807,6 +812,10 @@ static const WT_CONFIG_ENTRY config_entries[] = { "", NULL, 0 }, + { "WT_SESSION.transaction_sync", + "timeout=0", + confchk_WT_SESSION_transaction_sync, 1 + }, { "WT_SESSION.truncate", "", NULL, 0 diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index 75fdd7a9aa1..c4d70b9c1ea 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -30,7 +30,9 @@ __logmgr_sync_cfg(WT_SESSION_IMPL *session, const char **cfg) WT_RET( __wt_config_gets(session, cfg, "transaction_sync.method", &cval)); FLD_CLR(conn->txn_logsync, WT_LOG_DSYNC | WT_LOG_FSYNC); - if (WT_STRING_MATCH("dsync", cval.str, cval.len)) + if (WT_STRING_MATCH("background", cval.str, cval.len)) + FLD_SET(conn->txn_logsync, WT_LOG_BACKGROUND); + else if (WT_STRING_MATCH("dsync", cval.str, cval.len)) FLD_SET(conn->txn_logsync, WT_LOG_DSYNC); else if (WT_STRING_MATCH("fsync", cval.str, cval.len)) FLD_SET(conn->txn_logsync, WT_LOG_FSYNC); @@ -280,7 +282,7 @@ __log_close_server(void *arg) WT_DECL_RET; WT_FH *close_fh; WT_LOG *log; - WT_LSN close_end_lsn, close_lsn; + WT_LSN close_end_lsn, close_lsn, min_lsn; WT_SESSION_IMPL *session; int locked; @@ -321,10 +323,40 @@ __log_close_server(void *arg) WT_ERR(__wt_cond_signal(session, log->log_sync_cond)); locked = 0; __wt_spin_unlock(session, &log->log_sync_lock); - } else - /* Wait until the next event. */ - WT_ERR(__wt_cond_wait(session, - conn->log_close_cond, WT_MILLION)); + } + /* + * If a later thread asked for a background sync, do it now. + */ + if (WT_LOG_CMP(&log->bg_sync_lsn, &log->sync_lsn)) { + /* + * Save the latest write LSN which is the minimum + * we will have written to disk. + */ + min_lsn = log->write_lsn; + /* + * The sync LSN we asked for better be smaller than + * the current written LSN. + */ + WT_ASSERT(session, + WT_LOG_CMP(&log->bg_sync_lsn, &min_lsn) <= 0); + WT_ERR(__wt_fsync(session, log->log_fh)); + __wt_spin_lock(session, &log->log_sync_lock); + locked = 1; + /* + * The sync LSN could have advanced while we were + * writing to disk. + */ + if (WT_LOG_CMP(&log->sync_lsn, &min_lsn) <= 0) { + log->sync_lsn = min_lsn; + WT_ERR(__wt_cond_signal( + session, log->log_sync_cond)); + } + locked = 0; + __wt_spin_unlock(session, &log->log_sync_lock); + } + /* Wait until the next event. */ + WT_ERR(__wt_cond_wait( + session, conn->log_close_cond, WT_MILLION)); } if (0) { diff --git a/src/include/config.h b/src/include/config.h index baf81b823a6..2207087a1d8 100644 --- a/src/include/config.h +++ b/src/include/config.h @@ -74,17 +74,18 @@ struct __wt_config_parser_impl { #define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 22 #define WT_CONFIG_ENTRY_WT_SESSION_salvage 23 #define WT_CONFIG_ENTRY_WT_SESSION_strerror 24 -#define WT_CONFIG_ENTRY_WT_SESSION_truncate 25 -#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 26 -#define WT_CONFIG_ENTRY_WT_SESSION_verify 27 -#define WT_CONFIG_ENTRY_colgroup_meta 28 -#define WT_CONFIG_ENTRY_file_meta 29 -#define WT_CONFIG_ENTRY_index_meta 30 -#define WT_CONFIG_ENTRY_table_meta 31 -#define WT_CONFIG_ENTRY_wiredtiger_open 32 -#define WT_CONFIG_ENTRY_wiredtiger_open_all 33 -#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 34 -#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 35 +#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 25 +#define WT_CONFIG_ENTRY_WT_SESSION_truncate 26 +#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 27 +#define WT_CONFIG_ENTRY_WT_SESSION_verify 28 +#define WT_CONFIG_ENTRY_colgroup_meta 29 +#define WT_CONFIG_ENTRY_file_meta 30 +#define WT_CONFIG_ENTRY_index_meta 31 +#define WT_CONFIG_ENTRY_table_meta 32 +#define WT_CONFIG_ENTRY_wiredtiger_open 33 +#define WT_CONFIG_ENTRY_wiredtiger_open_all 34 +#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 35 +#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 36 /* * configuration section: END * DO NOT EDIT: automatically built by dist/flags.py. diff --git a/src/include/flags.h b/src/include/flags.h index 95aa6f9809d..e5282ef8867 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -30,9 +30,10 @@ #define WT_LOGSCAN_FROM_CKP 0x00000002 #define WT_LOGSCAN_ONE 0x00000004 #define WT_LOGSCAN_RECOVER 0x00000008 -#define WT_LOG_DSYNC 0x00000001 -#define WT_LOG_FLUSH 0x00000002 -#define WT_LOG_FSYNC 0x00000004 +#define WT_LOG_BACKGROUND 0x00000001 +#define WT_LOG_DSYNC 0x00000002 +#define WT_LOG_FLUSH 0x00000004 +#define WT_LOG_FSYNC 0x00000008 #define WT_READ_CACHE 0x00000001 #define WT_READ_COMPACT 0x00000002 #define WT_READ_NO_EVICT 0x00000004 diff --git a/src/include/log.h b/src/include/log.h index f4f7361b53f..cd2beb4f16f 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -123,6 +123,7 @@ typedef struct { * System LSNs */ WT_LSN alloc_lsn; /* Next LSN for allocation */ + WT_LSN bg_sync_lsn; /* Latest background sync LSN */ WT_LSN ckpt_lsn; /* Last checkpoint LSN */ WT_LSN first_lsn; /* First LSN */ WT_LSN sync_dir_lsn; /* LSN of the last directory sync */ diff --git a/src/include/session.h b/src/include/session.h index daa47d6e776..8a8b229dbc0 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -112,6 +112,7 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl { WT_TXN_ISOLATION isolation; WT_TXN txn; /* Transaction state */ + WT_LSN bg_sync_lsn; /* Background sync operation LSN. */ u_int ncursors; /* Count of active file cursors. */ void *block_manager; /* Block-manager support */ diff --git a/src/include/stat.h b/src/include/stat.h index cbe8167907a..9aa32734dca 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -274,6 +274,7 @@ struct __wt_connection_stats { WT_STATS txn_pinned_checkpoint_range; WT_STATS txn_pinned_range; WT_STATS txn_rollback; + WT_STATS txn_sync; WT_STATS write_io; }; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 05e92d313f2..772be74005a 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1457,6 +1457,20 @@ struct __wt_session { */ int __F(transaction_pinned_range)(WT_SESSION* session, uint64_t *range); + /*! + * Wait for a transaction to become synchronized. + * + * @snippet ex_all.c Transaction sync + * + * @param session the session handle + * @configstart{WT_SESSION.transaction_sync, see dist/api_data.py} + * @config{timeout, maximum amount of time to wait for background sync + * in seconds. A value of zero disables the timeout., an integer; + * default \c 0.} + * @configend + * @errors + */ + int __F(transaction_sync)(WT_SESSION *session, const char *config); /*! @} */ }; @@ -2113,8 +2127,8 @@ struct __wt_connection { * WT_SESSION::begin_transaction., a boolean flag; default \c false.} * @config{    method, the method used to ensure log records * are stable on disk\, see @ref tune_durability for more information., a - * string\, chosen from the following options: \c "dsync"\, \c "fsync"\, \c - * "none"; default \c fsync.} + * string\, chosen from the following options: \c "background"\, \c "dsync"\, \c + * "fsync"\, \c "none"; default \c fsync.} * @config{ ),,} * @config{use_environment_priv, use the \c WIREDTIGER_CONFIG and \c * WIREDTIGER_HOME environment variables regardless of whether or not the @@ -3525,8 +3539,10 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_TXN_PINNED_RANGE 1140 /*! transaction: transactions rolled back */ #define WT_STAT_CONN_TXN_ROLLBACK 1141 +/*! transaction: transaction sync calls */ +#define WT_STAT_CONN_TXN_SYNC 1142 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1142 +#define WT_STAT_CONN_WRITE_IO 1143 /*! * @} diff --git a/src/log/log.c b/src/log/log.c index 6d64cd00c2a..a0b8611dc84 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -1769,6 +1769,22 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, (void)__wt_cond_wait( session, log->log_write_cond, 10000); } + /* + * Advance the background sync LSN if needed and it is later than + * another transaction. + */ + if (LF_ISSET(WT_LOG_BACKGROUND) && + WT_LOG_CMP(&session->bg_sync_lsn, &lsn) <= 0) { + session->bg_sync_lsn = lsn; + /* + * Advance the logging subsystem background sync LSN if + * needed. + */ + __wt_spin_lock(session, &log->log_sync_lock); + if (WT_LOG_CMP(&lsn, &log->bg_sync_lsn) > 0) + log->bg_sync_lsn = lsn; + __wt_spin_unlock(session, &log->log_sync_lock); + } err: if (locked) __wt_spin_unlock(session, &log->log_slot_lock); diff --git a/src/session/session_api.c b/src/session/session_api.c index 2aa8e924302..72c9f5d7527 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -74,6 +74,7 @@ __session_clear(WT_SESSION_IMPL *session) memset(session, 0, WT_SESSION_CLEAR_SIZE(session)); session->hazard_size = 0; session->nhazard = 0; + WT_INIT_LSN(&session->bg_sync_lsn); } /* @@ -849,6 +850,78 @@ __session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange) err: API_END_RET(session, ret); } +/* + * __session_transaction_sync -- + * WT_SESSION->transaction_sync method. + */ +static int +__session_transaction_sync(WT_SESSION *wt_session, const char *config) +{ + WT_CONFIG_ITEM cval; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_LOG *log; + WT_SESSION_IMPL *session; + struct timespec end, now; + uint64_t wait_secs; + + session = (WT_SESSION_IMPL *)wt_session; + conn = S2C(session); + /* + * If logging is not enabled there is nothing to do. + */ + if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) + return (0); + SESSION_API_CALL(session, transaction_sync, config, cfg); + WT_STAT_FAST_CONN_INCR(session, txn_sync); + + log = conn->log; + ret = 0; + + /* + * If there is no background sync LSN in this session, there + * is nothing to do. + */ + if (WT_IS_INIT_LSN(&session->bg_sync_lsn)) + goto err; + + /* + * If our LSN is smaller than the current sync LSN then our + * transaction is stable. We're done. + */ + if (WT_LOG_CMP(&session->bg_sync_lsn, &log->sync_lsn) <= 0) + goto err; + + /* + * Our LSN is not yet stable. Wait and check again depending on the + * timeout. + */ + WT_ERR(__wt_config_gets_def(session, cfg, "timeout", 0, &cval)); + if (cval.len != 0) + wait_secs = (uint64_t)cval.val; + + if (wait_secs == 0) + WT_ERR(ETIMEDOUT); + + WT_ERR(__wt_epoch(session, &end)); + end.tv_sec += wait_secs; + + /* + * Keep checking the LSNs until we find it is stable or we reach + * our timeout. + */ + while (WT_LOG_CMP(&session->bg_sync_lsn, &log->sync_lsn) > 0) { + WT_ERR(__wt_epoch(session, &now)); + if (WT_TIMECMP(now, end) <= 0) + WT_ERR(__wt_cond_wait( + session, log->log_sync_cond, WT_MILLION)); + else + WT_ERR(ETIMEDOUT); + } + +err: API_END_RET(session, ret); +} + /* * __session_checkpoint -- * WT_SESSION->checkpoint method. @@ -997,7 +1070,8 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, __session_commit_transaction, __session_rollback_transaction, __session_checkpoint, - __session_transaction_pinned_range + __session_transaction_pinned_range, + __session_transaction_sync }; WT_DECL_RET; WT_SESSION_IMPL *session, *session_ret; diff --git a/src/support/stat.c b/src/support/stat.c index aa30126ca91..a5fbfd2c1b9 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -550,6 +550,7 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats) "transaction: transaction range of IDs currently pinned"; stats->txn_pinned_checkpoint_range.desc = "transaction: transaction range of IDs currently pinned by a checkpoint"; + stats->txn_sync.desc = "transaction: transaction sync calls"; stats->txn_commit.desc = "transaction: transactions committed"; stats->txn_rollback.desc = "transaction: transactions rolled back"; } @@ -671,6 +672,7 @@ __wt_stat_refresh_connection_stats(void *stats_arg) stats->txn_begin.v = 0; stats->txn_checkpoint.v = 0; stats->txn_fail_cache.v = 0; + stats->txn_sync.v = 0; stats->txn_commit.v = 0; stats->txn_rollback.v = 0; }