SERVER-51690 Futurize and refactor Mongos execCommandClient

SERVER-51690 Futurize Mongos runCommand for async command execution
SERVER-51690 Futurize clientCommand to support async command execution
2020-11-18 00:38:58 +00:00 · 2020-11-17 17:07:44 +00:00 · 2020-11-05 02:21:26 +00:00 · 2020-11-03 16:43:16 +00:00 · 2020-11-02 17:38:29 +00:00 · 2020-10-23 21:48:26 +00:00
51 changed files with 4867 additions and 2285 deletions
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -12600,6 +12600,38 @@ buildvariants:
    distros:
    - ubuntu1804-build

+- name: enterprise-ubuntu-fixed-service-executor-1604-64-bit
+  display_name: "~ Enterprise Ubuntu 16.04 (with FixedServiceExecutor)"
+  batchtime: 1440 # 1 day
+  run_on:
+    - ubuntu1604-test
+  modules:
+  - enterprise
+  expansions:
+    scons_cache_scope: shared
+    compile_flags: MONGO_DISTMOD=ubuntu1604 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars
+    multiversion_platform: ubuntu1604
+    multiversion_edition: enterprise
+    test_flags: >-
+      --mongosSetParameters="{initialServiceExecutorThreadingModel: borrowed}"
+      --mongodSetParameters="{initialServiceExecutorThreadingModel: borrowed}"
+
+  tasks:
+  - name: compile_all_run_unittests_TG
+    distros:
+    - ubuntu1604-build
+  - name: .aggregation !.no_async
+  - name: .sharding .auth
+  - name: .sharding .causally_consistent !.wo_snapshot
+  - name: .concurrency .common !.kill_terminate
+  - name: .integration !.audit
+  - name: .jscore .common
+  - name: noPassthrough_gen
+  - name: noPassthroughWithMongod_gen
+  - name: .logical_session_cache .one_sec
+  - name: .sharding .jscore !.wo_snapshot !.multi_stmt
+  - name: .sharding .common !.csrs
+
 - name: enterprise-ubuntu-scanning-replica-set-monitor-1604-64-bit
  display_name: "~ Enterprise Ubuntu 16.04 (with ScanningReplicaSetMonitor)"
  batchtime: 1440 # 1 day
--- a/jstests/noPassthrough/max_conns_override.js
+++ b/jstests/noPassthrough/max_conns_override.js
@@ -1,43 +1,164 @@
 (function() {
 'use strict';
+
+load("jstests/libs/host_ipaddr.js");
+
 const configuredMaxConns = 5;
 const configuredReadyAdminThreads = 3;
 let conn = MongoRunner.runMongod({
    config: "jstests/noPassthrough/libs/max_conns_override_config.yaml",
 });

-// Use up all the maxConns with junk connections, all of these should succeed
-let maxConns = [];
-for (let i = 0; i < 5; i++) {
-    maxConns.push(new Mongo(`127.0.0.1:${conn.port}`));
-    let tmpDb = maxConns[maxConns.length - 1].getDB("admin");
-    assert.commandWorked(tmpDb.runCommand({isMaster: 1}));
+// Get serverStatus to check that we have the right number of threads in the right places
+function getStats() {
+    return assert.commandWorked(conn.getDB("admin").runCommand({serverStatus: 1}));
 }

-// Get serverStatus to check that we have the right number of threads in the right places
-let status = conn.getDB("admin").runCommand({serverStatus: 1});
-const connectionsStatus = status["connections"];
-const reservedExecutorStatus = connectionsStatus["adminConnections"];
-const normalExecutorStatus = status["network"]["serviceExecutorTaskStats"];
+function verifyStats({exemptCount, normalCount}) {
+    const totalCount = exemptCount + normalCount;

-// Log these serverStatus sections so we can debug this easily
-print("connections status section: ", tojson(connectionsStatus));
-print("normal executor status section: ", tojson(normalExecutorStatus));
+    // Verify that we have updated serverStatus.
+    assert.soon(() => {
+        const serverStatus = getStats();
+        const executors = serverStatus.network.serviceExecutors;

-// The number of "available" connections should be less than zero, because we've used
-// all of maxConns. We're over the limit!
-assert.lt(connectionsStatus["available"], 0);
-// The number of "current" connections should be greater than maxConns
-assert.gt(connectionsStatus["current"], configuredMaxConns);
-// The number of ready threads should be the number of readyThreads we configured, since
-// every thread spawns a new thread on startup
-assert.eq(reservedExecutorStatus["readyThreads"] + reservedExecutorStatus["startingThreads"],
-          configuredReadyAdminThreads);
-// The number of running admin threads should be greater than the readyThreads, because
-// one is being used right now
-assert.gt(reservedExecutorStatus["threadsRunning"], reservedExecutorStatus["readyThreads"]);
-// The normal serviceExecutor should only be running maxConns number of threads
-assert.eq(normalExecutorStatus["threadsRunning"], configuredMaxConns);
+        const currentCount = serverStatus.connections.current;
+        if (currentCount != totalCount) {
+            print(`Not yet at the expected count of connections: ${currentCount} != ${totalCount}`);
+            return false;
+        }

-MongoRunner.stopMongod(conn);
+        const readyAdminThreads =
+            executors.reserved.threadsRunning - executors.reserved.clientsRunning;
+        if (readyAdminThreads < configuredReadyAdminThreads) {
+            print("Not enough admin threads yet: " +
+                  `${readyAdminThreads} < ${configuredReadyAdminThreads}`);
+            return false;
+        }
+
+        const threadedCount = serverStatus.connections.threaded;
+        const threadedExecutorCount =
+            executors.passthrough.clientsInTotal + executors.reserved.clientsInTotal;
+        if (threadedCount != threadedExecutorCount) {
+            print("Not enough running threaded clients yet: " +
+                  `${threadedCount} != ${threadedExecutorCount}`);
+            return false;
+        }
+
+        const totalExecutorCount = threadedExecutorCount + executors.fixed.clientsInTotal;
+        if (totalCount != totalExecutorCount) {
+            print(`Not enough running clients yet: ${totalCount} != ${totalExecutorCount}`);
+            return false;
+        }
+
+        return true;
+    }, "Failed to verify initial conditions", 10000);
+
+    const serverStatus = getStats();
+    const connectionsStatus = serverStatus.connections;
+    const reservedExecutorStatus = serverStatus.network.serviceExecutors.reserved;
+    const fixedExecutorStatus = serverStatus.network.serviceExecutors.fixed;
+    const executorStatus = serverStatus.network.serviceExecutors.passthrough;
+
+    // Log these serverStatus sections so we can debug this easily.
+    const filteredSections = {
+        connections: connectionsStatus,
+        network: {
+            serviceExecutors: {
+                passthrough: executorStatus,
+                fixed: fixedExecutorStatus,
+                reserved: reservedExecutorStatus
+            }
+        }
+    };
+    print(`serverStatus: ${tojson(filteredSections)}`);
+
+    if (totalCount > configuredMaxConns) {
+        // If we're over maxConns, there are no available connections.
+        assert.lte(connectionsStatus["available"], -1);
+    } else {
+        assert.eq(connectionsStatus["available"], configuredMaxConns - totalCount);
+    }
+
+    // All connections on an exempt CIDR should be marked as limitExempt.
+    assert.eq(connectionsStatus["limitExempt"], exemptCount);
+
+    // The normal serviceExecutor should only be running at most maxConns number of threads.
+    assert.lte(executorStatus["threadsRunning"], configuredMaxConns);
+
+    // Clients on the normal executor own their thread and cannot wait asynchronously.
+    assert.eq(executorStatus["clientsRunning"], executorStatus["clientsInTotal"]);
+    assert.eq(executorStatus["clientsRunning"], executorStatus["threadsRunning"]);
+    assert.eq(executorStatus["clientsWaitingForData"], 0);
+
+    // Clients on the reserved executor run on a thread and cannot wait asynchronously.
+    assert.eq(reservedExecutorStatus["clientsRunning"], reservedExecutorStatus["clientsInTotal"]);
+    assert.lte(reservedExecutorStatus["clientsRunning"], reservedExecutorStatus["threadsRunning"]);
+    assert.eq(reservedExecutorStatus["clientsWaitingForData"], 0);
+
+    // Clients on the fixed executor borrow one thread and can wait asynchronously
+    assert.lte(fixedExecutorStatus["clientsRunning"], fixedExecutorStatus["clientsInTotal"]);
+    assert.lte(fixedExecutorStatus["clientsRunning"], fixedExecutorStatus["threadsRunning"]);
+    assert.lte(fixedExecutorStatus["clientsWaitingForData"], fixedExecutorStatus["clientsInTotal"]);
+}
+
+// Use the external ip to avoid our exempt CIDR.
+let ip = get_ipaddr();
+
+try {
+    let adminConns = [];
+    let normalConns = [];
+
+    // We start with one exempt control socket.
+    let exemptCount = 1;
+    let normalCount = 0;
+
+    // Do an initial verification.
+    verifyStats({exemptCount: exemptCount, normalCount: normalCount});
+
+    for (let i = 0; i < 2 * configuredMaxConns; i++) {
+        // Make some connections using the exempt CIDR and some using the normal CIDR.
+        let isExempt = (i % 2 == 0);
+        try {
+            if (isExempt) {
+                adminConns.push(new Mongo(`127.0.0.1:${conn.port}`));
+                ++exemptCount;
+            } else {
+                normalConns.push(new Mongo(`${ip}:${conn.port}`));
+                ++normalCount;
+            }
+        } catch (e) {
+            print(e);
+
+            // If we couldn't connect, that means we've exceeded maxConns and we're using the normal
+            // CIDR.
+            assert(!isExempt);
+            assert(i >= configuredMaxConns);
+        }
+
+        verifyStats({exemptCount: exemptCount, normalCount: normalCount});
+    }
+
+    // Some common sense assertions around what was admitted.
+    assert.eq(exemptCount, configuredMaxConns + 1);
+    assert.lte(normalCount, configuredMaxConns);
+
+    // Destroy all admin connections and verify assumptions.
+    while (adminConns.length) {
+        adminConns.pop().close();
+        --exemptCount;
+
+        verifyStats({exemptCount: exemptCount, normalCount: normalCount});
+    }
+
+    // Destroy all normal connections and verify assumptions.
+    while (normalConns.length) {
+        normalConns.pop().close();
+        --normalCount;
+
+        verifyStats({exemptCount: exemptCount, normalCount: normalCount});
+    }
+} finally {
+    MongoRunner.stopMongod(conn);
+}
 })();
--- a/src/mongo/base/error_codes.yml
+++ b/src/mongo/base/error_codes.yml
@@ -401,6 +401,13 @@ error_codes:

    - {code: 327, name: NoSuchTenantMigration}

+    - {code: 328, name: SkipCommandExecution}
+
+    - {code: 329, name: FailedToRunWithReplyBuilder}
+
+    # Internal error
+    - {code: 330, name: ServiceExecutorInShutdown, categories: [ShutdownError,CancelationError]}
+
    # Error codes 4000-8999 are reserved.

    # Non-sequential error codes for compatibility only)
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -450,6 +450,7 @@ env.Library(
    source=[
        'baton.cpp',
        'client.cpp',
+        'client_strand.cpp',
        'default_baton.cpp',
        'operation_context.cpp',
        'operation_context_group.cpp',
@@ -2212,6 +2213,7 @@ envWithAsio.CppUnitTest(
    target='db_unittests',
    source=[
        'catalog_raii_test.cpp',
+        'client_strand_test.cpp',
        'collection_index_usage_tracker_test.cpp',
        'commands_test.cpp',
        'curop_test.cpp',
--- a/src/mongo/db/client.cpp
+++ b/src/mongo/db/client.cpp
@@ -155,7 +155,7 @@ bool haveClient() {
 }

 ServiceContext::UniqueClient Client::releaseCurrent() {
-    invariant(haveClient());
+    invariant(haveClient(), "No client to release");
    return std::move(currentClient);
 }

--- a/src/mongo/db/client_strand.cpp
+++ b/src/mongo/db/client_strand.cpp
@@ -0,0 +1,95 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/client_strand.h"
+
+#include "mongo/logv2/log.h"
+#include "mongo/util/concurrency/thread_name.h"
+
+namespace mongo {
+namespace {
+struct ClientStrandData {
+    ClientStrand* strand = nullptr;
+};
+
+auto getClientStrandData = Client::declareDecoration<ClientStrandData>();
+}  // namespace
+
+boost::intrusive_ptr<ClientStrand> ClientStrand::make(ServiceContext::UniqueClient client) {
+    auto strand = make_intrusive<ClientStrand>(std::move(client));
+    getClientStrandData(strand->getClientPointer()).strand = strand.get();
+    return strand;
+}
+
+boost::intrusive_ptr<ClientStrand> ClientStrand::get(Client* client) {
+    return getClientStrandData(client).strand;
+}
+
+void ClientStrand::_setCurrent() noexcept {
+    invariant(_isBound.load());
+    invariant(_client);
+
+    LOGV2_DEBUG(
+        4910701, kDiagnosticLogLevel, "Setting the Client", "client"_attr = _client->desc());
+
+    // Set the Client for this thread so calls to Client::getCurrent() works as expected.
+    Client::setCurrent(std::move(_client));
+
+    // Set up the thread name.
+    auto oldThreadName = getThreadName();
+    StringData threadName = _clientPtr->desc();
+    if (oldThreadName != threadName) {
+        _oldThreadName = oldThreadName.toString();
+        setThreadName(threadName);
+        LOGV2_DEBUG(4910703, kDiagnosticLogLevel, "Set thread name", "name"_attr = threadName);
+    }
+}
+
+void ClientStrand::_releaseCurrent() noexcept {
+    invariant(_isBound.load());
+    invariant(!_client);
+
+    // Reclaim the client.
+    _client = Client::releaseCurrent();
+    invariant(_client.get() == _clientPtr, kUnableToRecoverClient);
+
+    if (!_oldThreadName.empty()) {
+        // Reset the old thread name.
+        setThreadName(_oldThreadName);
+    }
+
+    LOGV2_DEBUG(
+        4910702, kDiagnosticLogLevel, "Released the Client", "client"_attr = _client->desc());
+}
+
+}  // namespace mongo
--- a/src/mongo/db/client_strand.h
+++ b/src/mongo/db/client_strand.h
@@ -0,0 +1,214 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include <string>
+
+#include "mongo/db/client.h"
+#include "mongo/db/service_context.h"
+#include "mongo/platform/atomic_word.h"
+#include "mongo/stdx/mutex.h"
+#include "mongo/util/intrusive_counter.h"
+#include "mongo/util/out_of_line_executor.h"
+
+namespace mongo {
+
+/**
+ * ClientStrand is a reference counted type for loaning Clients to threads.
+ *
+ * ClientStrand maintains the lifetime of its wrapped Client object and provides functionality to
+ * "bind" that Client to one and only one thread at a time. Its functions are synchronized.
+ */
+class ClientStrand final : public RefCountable {
+    static constexpr auto kDiagnosticLogLevel = 3;
+
+public:
+    static constexpr auto kUnableToRecoverClient = "Unable to recover Client for ClientStrand";
+
+    /**
+     * A simple RAII guard to set and release Clients.
+     */
+    class Guard {
+    public:
+        Guard() = default;
+        Guard(Guard&&) = default;
+        Guard& operator=(Guard&&) = default;
+
+        Guard(const Guard&) = delete;
+        Guard& operator=(const Guard&) = delete;
+
+        Guard(ClientStrand* strand) : _strand(strand) {
+            // Hold the lock for as long as the Guard is around. This forces other consumers to
+            // queue behind the Guard.
+            _strand->_mutex.lock();
+            _strand->_isBound.store(true);
+
+            _strand->_setCurrent();
+        }
+
+        ~Guard() {
+            dismiss();
+        }
+
+        void dismiss() noexcept {
+            auto strand = std::exchange(_strand, {});
+            if (!strand) {
+                return;
+            }
+
+            strand->_releaseCurrent();
+            strand->_isBound.store(false);
+            strand->_mutex.unlock();
+        }
+
+        Client* get() noexcept {
+            return _strand->getClientPointer();
+        }
+
+        Client* operator->() noexcept {
+            return get();
+        }
+
+        Client& operator*() noexcept {
+            return *get();
+        }
+
+    private:
+        boost::intrusive_ptr<ClientStrand> _strand;
+    };
+
+    /**
+     * A simple wrapping executor to run tasks while a Client is bound.
+     */
+    class Executor final : public OutOfLineExecutor {
+    public:
+        Executor(ClientStrand* strand, ExecutorPtr exec)
+            : _strand(strand), _exec(std::move(exec)) {}
+        void schedule(Task task) override;
+
+    private:
+        boost::intrusive_ptr<ClientStrand> _strand;
+        ExecutorPtr _exec;
+    };
+
+    /**
+     * Make a new ClientStrand from a UniqueClient.
+     */
+    static boost::intrusive_ptr<ClientStrand> make(ServiceContext::UniqueClient client);
+
+    /**
+     * Acquire an owning ClientStrand given a client.
+     *
+     * This will return nullptr if the Client does not belong to a ClientStrand.
+     */
+    static boost::intrusive_ptr<ClientStrand> get(Client* client);
+
+    ClientStrand(ServiceContext::UniqueClient client)
+        : _clientPtr(client.get()), _client(std::move(client)) {}
+
+    /**
+     * Get a pointer to the underlying Client.
+     */
+    Client* getClientPointer() noexcept {
+        return _clientPtr;
+    }
+
+    /**
+     * Set the current Client for this thread and return a RAII guard to release it eventually.
+     *
+     * If the Client is currently bound, this function will block until the Client is available.
+     */
+    auto bind() {
+        return Guard(this);
+    }
+
+    /**
+     * Run a Task with the Client bound to the current thread.
+     *
+     * This function runs the task inline and assumes that the Client is not already bound to the
+     * current thread. If the Client is currently bound, this function will block until it is
+     * released.
+     */
+    template <typename Task, typename... Args>
+    void run(Task task, Args&&... args) {
+        auto guard = bind();
+
+        return task(std::forward<Args>(args)...);
+    }
+
+    /**
+     * Make a wrapped executor around another.
+     */
+    ExecutorPtr makeExecutor(ExecutorPtr exec) {
+        return std::make_shared<Executor>(this, std::move(exec));
+    }
+
+    /**
+     * Return if the strand is currently bound to a Client.
+     */
+    bool isBound() const noexcept {
+        return _isBound.load();
+    }
+
+private:
+    /**
+     * Bind the Client to the current thread.
+     *
+     * This is only valid to call if no other thread has the Client bound.
+     */
+    void _setCurrent() noexcept;
+
+    /**
+     * Release the Client from the current thread.
+     *
+     * This is valid to call multiple times on the same thread. It is not valid to mix this with
+     * Client::releaseCurrent().
+     */
+    void _releaseCurrent() noexcept;
+
+    Client* const _clientPtr;
+
+    stdx::mutex _mutex;  // NOLINT
+
+    // Once we have stdx::atomic::wait(), we can get rid of the mutex in favor of this variable.
+    AtomicWord<bool> _isBound{false};
+
+    ServiceContext::UniqueClient _client;
+
+    std::string _oldThreadName;
+};
+
+inline void ClientStrand::Executor::schedule(Task task) {
+    _exec->schedule([task = std::forward<Task>(task), strand = _strand](Status status) mutable {
+        strand->run(std::move(task), std::move(status));
+    });
+}
+
+using ClientStrandPtr = boost::intrusive_ptr<ClientStrand>;
+
+}  // namespace mongo
--- a/src/mongo/db/client_strand_test.cpp
+++ b/src/mongo/db/client_strand_test.cpp
@@ -0,0 +1,381 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include <memory>
+
+#include "mongo/db/client_strand.h"
+#include "mongo/db/service_context_test_fixture.h"
+#include "mongo/unittest/barrier.h"
+#include "mongo/unittest/death_test.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/assert_util.h"
+#include "mongo/util/concurrency/thread_name.h"
+#include "mongo/util/executor_test_util.h"
+
+namespace mongo {
+namespace {
+
+class ClientStrandTest : public unittest::Test, public ScopedGlobalServiceContextForTest {
+public:
+    constexpr static auto kClientName1 = "foo";
+    constexpr static auto kClientName2 = "bar";
+
+    void assertStrandNotBound(const ClientStrandPtr& strand) {
+        ASSERT_FALSE(haveClient());
+        ASSERT_FALSE(strand->isBound());
+    }
+
+    void assertStrandBound(const ClientStrandPtr& strand) {
+        // We have a Client.
+        ASSERT_TRUE(haveClient());
+        ASSERT_TRUE(strand->isBound());
+
+        // The current Client and Thread have the correct name.
+        auto client = strand->getClientPointer();
+        ASSERT_EQ(client, Client::getCurrent());
+        ASSERT_EQ(client->desc(), getThreadName());
+    }
+};
+
+TEST_F(ClientStrandTest, CreateOnly) {
+    auto strand = ClientStrand::make(getServiceContext()->makeClient(kClientName1));
+
+    // We have no bound Client.
+    assertStrandNotBound(strand);
+
+    // The Client should exist.
+    ASSERT_TRUE(strand->getClientPointer());
+
+    // The Client should reference its ClientStrand.
+    ASSERT_EQ(ClientStrand::get(strand->getClientPointer()), strand);
+}
+
+TEST_F(ClientStrandTest, BindOnce) {
+    auto strand = ClientStrand::make(getServiceContext()->makeClient(kClientName1));
+
+    // We have no bound Client.
+    assertStrandNotBound(strand);
+
+    {
+        // Bind a single client
+        auto guard = strand->bind();
+        assertStrandBound(strand);
+
+        // The guard allows us to get the Client.
+        ASSERT_EQ(guard.get(), strand->getClientPointer());
+    }
+
+    // We have no bound Client again.
+    assertStrandNotBound(strand);
+}
+
+TEST_F(ClientStrandTest, BindMultipleTimes) {
+    auto strand = ClientStrand::make(getServiceContext()->makeClient(kClientName1));
+
+    // We have no bound Client.
+    assertStrandNotBound(strand);
+
+    for (auto i = 0; i < 100; ++i) {
+        // Bind a bunch of times.
+
+        {
+            auto guard = strand->bind();
+            assertStrandBound(strand);
+        }
+
+        // We have no bound Client again.
+        assertStrandNotBound(strand);
+    }
+}
+
+TEST_F(ClientStrandTest, BindMultipleTimesAndDismiss) {
+    auto strand = ClientStrand::make(getServiceContext()->makeClient(kClientName1));
+
+    // We have no bound Client.
+    assertStrandNotBound(strand);
+
+    auto guard = strand->bind();
+    for (auto i = 0; i < 100; ++i) {
+        assertStrandBound(strand);
+
+        // Dismiss the current guard.
+        guard.dismiss();
+        assertStrandNotBound(strand);
+
+        // Assign a new guard.
+        guard = strand->bind();
+    }
+
+    // At the end we have a strand bound.
+    assertStrandBound(strand);
+}
+
+TEST_F(ClientStrandTest, BindLocalBeforeWorkerThread) {
+    auto strand = ClientStrand::make(getServiceContext()->makeClient(kClientName1));
+    auto barrier = std::make_shared<unittest::Barrier>(2);
+
+    // Set our state to an initial value. It is unsynchronized, but ClientStrand does synchronize,
+    // thus it should pass TSAN.
+    enum State {
+        kStarted,
+        kLocalThread,
+        kWorkerThread,
+    };
+    State state = kStarted;
+
+    assertStrandNotBound(strand);
+
+    auto thread = stdx::thread([&, barrier] {
+        // Wait for local thread to bind the strand.
+        barrier->countDownAndWait();
+
+        auto guard = strand->bind();
+        assertStrandBound(strand);
+
+        // We've acquired the strand after the local thread.
+        ASSERT_EQ(state, kLocalThread);
+        state = kWorkerThread;
+    });
+
+    {
+        auto guard = strand->bind();
+        assertStrandBound(strand);
+
+        // Wait for the worker thread.
+        barrier->countDownAndWait();
+
+        // We've acquired the strand first.
+        ASSERT_EQ(state, kStarted);
+        state = kLocalThread;
+    }
+
+    thread.join();
+
+    assertStrandNotBound(strand);
+
+    // Bind one last time to synchronize the state.
+    auto guard = strand->bind();
+
+    // The worker thread acquired the strand last.
+    ASSERT_EQ(state, kWorkerThread);
+}
+
+TEST_F(ClientStrandTest, BindLocalAfterWorkerThread) {
+    auto strand = ClientStrand::make(getServiceContext()->makeClient(kClientName1));
+    auto barrier = std::make_shared<unittest::Barrier>(2);
+
+    // Set our state to an initial value. It is unsynchronized, but ClientStrand does synchronize,
+    // thus it should pass TSAN.
+    enum State {
+        kStarted,
+        kLocalThread,
+        kWorkerThread,
+    };
+    State state = kStarted;
+
+    assertStrandNotBound(strand);
+
+    auto thread = stdx::thread([&, barrier] {
+        auto guard = strand->bind();
+        assertStrandBound(strand);
+
+        // Wait for local thread.
+        barrier->countDownAndWait();
+
+        // We've acquired the strand after the local thread.
+        ASSERT_EQ(state, kStarted);
+        state = kWorkerThread;
+    });
+
+    {
+        // Wait for the worker thread to bind the strand.
+        barrier->countDownAndWait();
+
+        auto guard = strand->bind();
+        assertStrandBound(strand);
+
+        // We've acquired the strand first.
+        ASSERT_EQ(state, kWorkerThread);
+        state = kLocalThread;
+    }
+
+    thread.join();
+
+    assertStrandNotBound(strand);
+
+    // Bind one last time to synchronize the state.
+    auto guard = strand->bind();
+    assertStrandBound(strand);
+
+    // The local thread acquired the strand last.
+    ASSERT_EQ(state, kLocalThread);
+}
+
+TEST_F(ClientStrandTest, BindManyWorkerThreads) {
+    auto strand = ClientStrand::make(getServiceContext()->makeClient(kClientName1));
+
+    constexpr size_t kCount = 100;
+    auto barrier = std::make_shared<unittest::Barrier>(kCount);
+
+    size_t threadsBound = 0;
+
+    assertStrandNotBound(strand);
+
+    std::vector<stdx::thread> threads;
+    for (size_t i = 0; i < kCount; ++i) {
+        threads.emplace_back([&, barrier] {
+            // Wait for the herd.
+            barrier->countDownAndWait();
+
+            auto guard = strand->bind();
+            assertStrandBound(strand);
+
+            // This is technically atomic on x86 but TSAN should complain if it isn't synchronized.
+            ++threadsBound;
+        });
+    }
+
+    for (auto& thread : threads) {
+        thread.join();
+    }
+
+    assertStrandNotBound(strand);
+
+    // Bind one last time to access the count.
+    auto guard = strand->bind();
+    assertStrandBound(strand);
+
+    // We've been bound to the amount of threads we expected.
+    ASSERT_EQ(threadsBound, kCount);
+}
+
+TEST_F(ClientStrandTest, SwapStrands) {
+    auto strand1 = ClientStrand::make(getServiceContext()->makeClient(kClientName1));
+    auto strand2 = ClientStrand::make(getServiceContext()->makeClient(kClientName2));
+
+    assertStrandNotBound(strand1);
+    assertStrandNotBound(strand2);
+
+    for (size_t i = 0; i < 100; ++i) {
+        // Alternate between binding strand1 and strand2.
+        auto& strand = (i % 2 == 0) ? strand1 : strand2;
+        auto guard = strand->bind();
+
+        assertStrandBound(strand);
+    }
+
+    assertStrandNotBound(strand1);
+    assertStrandNotBound(strand2);
+}
+
+TEST_F(ClientStrandTest, Executor) {
+    constexpr size_t kCount = 100;
+
+    auto strand = ClientStrand::make(getServiceContext()->makeClient(kClientName1));
+
+    assertStrandNotBound(strand);
+
+    auto exec = strand->makeExecutor(InlineQueuedCountingExecutor::make());
+
+    // Schedule a series of tasks onto the wrapped executor. Note that while this is running on the
+    // local thread, this is not true recursive execution which would deadlock.
+    size_t i = 0;
+    unique_function<void(void)> reschedule;
+    reschedule = [&] {
+        exec->schedule([&](Status status) {
+            invariant(status);
+            assertStrandBound(strand);
+
+            if (++i >= kCount) {
+                // We've rescheduled enough.
+                return;
+            }
+
+            reschedule();
+        });
+    };
+
+    reschedule();
+    assertStrandNotBound(strand);
+
+    // Confirm we scheduled as many times as we expected.
+    ASSERT_EQ(i, kCount);
+}
+
+DEATH_TEST_F(ClientStrandTest, ReplaceCurrentAfterBind, ClientStrand::kUnableToRecoverClient) {
+    auto strand = ClientStrand::make(getServiceContext()->makeClient(kClientName1));
+
+    assertStrandNotBound(strand);
+
+    auto guard = strand->bind();
+    assertStrandBound(strand);
+
+    // We need to capture the UniqueClient to avoid ABA pointer comparison issues with tcmalloc. In
+    // practice, this failure mode is most likely if someone is using an AlternativeClientRegion,
+    // which has its own issues.
+    auto stolenClient = Client::releaseCurrent();
+    Client::setCurrent(getServiceContext()->makeClient(kClientName2));
+
+    // Dismiss the guard for an explicit failure point.
+    guard.dismiss();
+}
+
+DEATH_TEST_F(ClientStrandTest, ReleaseCurrentAfterBind, "No client to release") {
+    auto strand = ClientStrand::make(getServiceContext()->makeClient(kClientName1));
+
+    assertStrandNotBound(strand);
+
+    auto guard = strand->bind();
+    assertStrandBound(strand);
+
+    Client::releaseCurrent();
+
+    // Dismiss the guard for an explicit failure point.
+    guard.dismiss();
+}
+
+DEATH_TEST_F(ClientStrandTest, BindAfterBind, "Already have client on this thread") {
+    auto strand1 = ClientStrand::make(getServiceContext()->makeClient(kClientName1));
+    auto strand2 = ClientStrand::make(getServiceContext()->makeClient(kClientName2));
+
+    assertStrandNotBound(strand1);
+    assertStrandNotBound(strand2);
+
+    // Bind our first strand.
+    auto guard1 = strand1->bind();
+    assertStrandBound(strand1);
+
+    // Bind our second strand...and fail hard.
+    auto guard2 = strand2->bind();
+}
+
+}  // namespace
+}  // namespace mongo
--- a/src/mongo/db/commands.cpp
+++ b/src/mongo/db/commands.cpp
@@ -184,6 +184,20 @@ void CommandHelpers::runCommandInvocation(OperationContext* opCtx,
    }
 }

+Future<void> CommandHelpers::runCommandInvocationAsync(
+    std::shared_ptr<RequestExecutionContext> rec,
+    std::shared_ptr<CommandInvocation> invocation) try {
+    auto hooks = getCommandInvocationHooksHandle(rec->getOpCtx()->getServiceContext());
+    if (hooks)
+        hooks->onBeforeAsyncRun(rec, invocation.get());
+    return invocation->runAsync(rec).then([rec, hooks, invocation] {
+        if (hooks)
+            hooks->onAfterAsyncRun(rec, invocation.get());
+    });
+} catch (const DBException& e) {
+    return e.toStatus();
+}
+
 void CommandHelpers::auditLogAuthEvent(OperationContext* opCtx,
                                       const CommandInvocation* invocation,
                                       const OpMsgRequest& request,
@@ -787,6 +801,16 @@ private:
        }
    }

+    Future<void> runAsync(std::shared_ptr<RequestExecutionContext> rec) override {
+        return _command->runAsync(rec, _dbName).onError([rec](Status status) {
+            if (status.code() != ErrorCodes::FailedToRunWithReplyBuilder)
+                return status;
+            BSONObjBuilder bob = rec->getReplyBuilder()->getBodyBuilder();
+            CommandHelpers::appendSimpleCommandStatus(bob, false);
+            return Status::OK();
+        });
+    }
+
    void explain(OperationContext* opCtx,
                 ExplainOptions::Verbosity verbosity,
                 rpc::ReplyBuilderInterface* result) override {
--- a/src/mongo/db/commands.h
+++ b/src/mongo/db/commands.h
@@ -30,6 +30,7 @@
 #pragma once

 #include <boost/optional.hpp>
+#include <fmt/format.h>
 #include <functional>
 #include <string>
 #include <vector>
@@ -47,10 +48,12 @@
 #include "mongo/db/query/explain.h"
 #include "mongo/db/read_concern_support_result.h"
 #include "mongo/db/repl/read_concern_args.h"
+#include "mongo/db/request_execution_context.h"
 #include "mongo/db/write_concern.h"
 #include "mongo/rpc/op_msg.h"
 #include "mongo/rpc/reply_builder_interface.h"
 #include "mongo/util/fail_point.h"
+#include "mongo/util/future.h"
 #include "mongo/util/string_map.h"

 namespace mongo {
@@ -91,12 +94,28 @@ public:
                             const OpMsgRequest& request,
                             CommandInvocation* invocation) = 0;

+    /**
+     * A behavior to perform before CommandInvocation::asyncRun(). Defaults to `onBeforeRun(...)`.
+     */
+    virtual void onBeforeAsyncRun(std::shared_ptr<RequestExecutionContext> rec,
+                                  CommandInvocation* invocation) {
+        onBeforeRun(rec->getOpCtx(), rec->getRequest(), invocation);
+    }
+
    /**
     * A behavior to perform after CommandInvocation::run()
     */
    virtual void onAfterRun(OperationContext* opCtx,
                            const OpMsgRequest& request,
                            CommandInvocation* invocation) = 0;
+
+    /**
+     * A behavior to perform after CommandInvocation::asyncRun(). Defaults to `onAfterRun(...)`.
+     */
+    virtual void onAfterAsyncRun(std::shared_ptr<RequestExecutionContext> rec,
+                                 CommandInvocation* invocation) {
+        onAfterRun(rec->getOpCtx(), rec->getRequest(), invocation);
+    }
 };

 // Various helpers unrelated to any single command or to the command registry.
@@ -235,6 +254,15 @@ struct CommandHelpers {
                                     CommandInvocation* invocation,
                                     rpc::ReplyBuilderInterface* response);

+    /**
+     * Runs a previously parsed command and propagates the result to the ReplyBuilderInterface. For
+     * commands that do not offer an implementation tailored for asynchronous execution, the future
+     * schedules the execution of the default implementation, historically designed for synchronous
+     * execution.
+     */
+    static Future<void> runCommandInvocationAsync(std::shared_ptr<RequestExecutionContext> rec,
+                                                  std::shared_ptr<CommandInvocation> invocation);
+
    /**
     * If '!invocation', we're logging about a Command pre-parse. It has to punt on the logged
     * namespace, giving only the request's $db. Since the Command hasn't parsed the request body,
@@ -561,6 +589,16 @@ public:
     */
    virtual void run(OperationContext* opCtx, rpc::ReplyBuilderInterface* result) = 0;

+    /**
+     * Returns a future that can schedule asynchronous execution of the command. By default, the
+     * future falls back to the execution of `run(...)`, thus the default semantics of
+     * `runAsync(...)` is identical to that of `run(...).
+     */
+    virtual Future<void> runAsync(std::shared_ptr<RequestExecutionContext> rec) {
+        run(rec->getOpCtx(), rec->getReplyBuilder());
+        return Status::OK();
+    }
+
    virtual void explain(OperationContext* opCtx,
                         ExplainOptions::Verbosity verbosity,
                         rpc::ReplyBuilderInterface* result) {
@@ -591,6 +629,34 @@ public:
                {kDefaultReadConcernNotPermitted}};
    }

+    /**
+     * Returns if this invocation is safe to run on a borrowed threading model.
+     *
+     * In practice, this is attempting to predict if the operation will do network or storage reads
+     * and writes. It will allow auth commands for the most part, since while they do involve
+     * network or storage operations, they are not targeting the storage engine or remote
+     * mongo-server nodes.
+     */
+    virtual bool isSafeForBorrowedThreads() const {
+        if (definition()->maintenanceMode() || !definition()->maintenanceOk()) {
+            // If the command has maintenance implications, it has storage implications.
+            return false;
+        }
+
+        if (supportsWriteConcern()) {
+            // If the command supports write concern, it has storage and network implications.
+            return false;
+        }
+
+        if (auto result = supportsReadConcern(repl::ReadConcernLevel::kMajorityReadConcern);
+            result.readConcernSupport.isOK()) {
+            // If the command supports read concern, it has storage and newtork implications.
+            return false;
+        }
+
+        return true;
+    }
+
    /**
     * Return if this invocation can be mirrored to secondaries
     */
@@ -697,6 +763,18 @@ public:
                                     const BSONObj& cmdObj,
                                     rpc::ReplyBuilderInterface* replyBuilder) = 0;

+    /**
+     * Provides a future that may run the command asynchronously. By default, it falls back to
+     * runWithReplyBuilder.
+     */
+    virtual Future<void> runAsync(std::shared_ptr<RequestExecutionContext> rec, std::string db) {
+        if (!runWithReplyBuilder(
+                rec->getOpCtx(), db, rec->getRequest().body, rec->getReplyBuilder()))
+            return Status(ErrorCodes::FailedToRunWithReplyBuilder,
+                          fmt::format("Failed to run command: {}", rec->getCommand()->getName()));
+        return Status::OK();
+    }
+
    /**
     * Commands which can be explained override this method. Any operation which has a query
     * part and executes as a tree of execution stages can be explained. A command should
--- a/src/mongo/db/commands/SConscript
+++ b/src/mongo/db/commands/SConscript
@@ -324,6 +324,7 @@ env.Library(
        "$BUILD_DIR/mongo/db/storage/two_phase_index_build_knobs_idl",
        '$BUILD_DIR/mongo/db/transaction',
        '$BUILD_DIR/mongo/db/views/views_mongod',
+        '$BUILD_DIR/mongo/executor/async_request_executor',
        '$BUILD_DIR/mongo/util/log_and_backoff',
        '$BUILD_DIR/mongo/util/net/http_client',
        'core',
@@ -629,6 +630,23 @@ env.CppUnitTest(
    ],
 )

+env.CppUnitTest(
+    target="async_command_execution_test",
+    source=[
+        "async_command_execution_test.cpp",
+    ],
+    LIBDEPS=[
+        "$BUILD_DIR/mongo/base",
+        "$BUILD_DIR/mongo/db/auth/authmocks",
+        "$BUILD_DIR/mongo/db/auth/authorization_manager_global",
+        "$BUILD_DIR/mongo/db/commands",
+        "$BUILD_DIR/mongo/db/commands/standalone",
+        "$BUILD_DIR/mongo/db/service_context_test_fixture",
+        "$BUILD_DIR/mongo/unittest/unittest",
+        "$BUILD_DIR/mongo/util/version_impl",
+    ],
+)
+
 env.CppUnitTest(
    target="db_commands_test",
    source=[
--- a/src/mongo/db/commands/async_command_execution_test.cpp
+++ b/src/mongo/db/commands/async_command_execution_test.cpp
@@ -0,0 +1,152 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
+
+#include <fmt/format.h>
+
+#include "mongo/bson/bsonobj.h"
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/client.h"
+#include "mongo/db/client_strand.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/request_execution_context.h"
+#include "mongo/db/service_context_test_fixture.h"
+#include "mongo/logv2/log.h"
+#include "mongo/rpc/factory.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/fail_point.h"
+
+namespace mongo {
+namespace {
+
+using namespace fmt::literals;
+
+class AsyncCommandExecutionTest : public unittest::Test, public ScopedGlobalServiceContextForTest {
+public:
+    void runTestForCommand(StringData command) {
+        BSONObj syncResponse, asyncResponse;
+
+        auto client = getServiceContext()->makeClient("Client");
+        auto strand = ClientStrand::make(std::move(client));
+
+        {
+            auto ctx = makeExecutionContext(strand, command);
+            strand->run([&] { syncResponse = getSyncResponse(ctx); });
+        }
+
+        {
+            auto ctx = makeExecutionContext(strand, command);
+            asyncResponse = getAsyncResponse(strand, ctx);
+        }
+
+        {
+            auto ctx = makeExecutionContext(strand, command);
+            killAsyncCommand(strand, ctx);
+        }
+
+        ASSERT_BSONOBJ_EQ(syncResponse, asyncResponse);
+    }
+
+private:
+    struct ExecutionContext {
+        ServiceContext::UniqueOperationContext opCtx;
+        std::shared_ptr<RequestExecutionContext> rec;
+        std::shared_ptr<CommandInvocation> invocation;
+    };
+
+    ExecutionContext makeExecutionContext(ClientStrandPtr strand, StringData commandName) const {
+        auto guard = strand->bind();
+        ExecutionContext ctx;
+        ctx.opCtx = cc().makeOperationContext();
+
+        auto rec =
+            std::make_shared<RequestExecutionContext>(ctx.opCtx.get(), mockMessage(commandName));
+        rec->setReplyBuilder(makeReplyBuilder(rpc::protocolForMessage(rec->getMessage())));
+        rec->setRequest(rpc::opMsgRequestFromAnyProtocol(rec->getMessage()));
+        rec->setCommand(CommandHelpers::findCommand(rec->getRequest().getCommandName()));
+
+        auto cmd = rec->getCommand();
+        invariant(cmd);
+        ctx.invocation = cmd->parse(ctx.opCtx.get(), rec->getRequest());
+        ctx.rec = std::move(rec);
+        return ctx;
+    }
+
+    BSONObj getSyncResponse(ExecutionContext& ctx) const {
+        ctx.invocation->run(ctx.rec->getOpCtx(), ctx.rec->getReplyBuilder());
+        return ctx.rec->getReplyBuilder()->getBodyBuilder().done().getOwned();
+    }
+
+    BSONObj getAsyncResponse(ClientStrandPtr strand, ExecutionContext& ctx) const {
+        Future<void> future;
+        {
+            auto guard = strand->bind();
+            FailPointEnableBlock fp("hangBeforeRunningAsyncRequestExecutorTask");
+            future = ctx.invocation->runAsync(ctx.rec);
+            ASSERT(!future.isReady());
+        }
+
+        ASSERT(future.getNoThrow().isOK());
+
+        return [&] {
+            auto guard = strand->bind();
+            return ctx.rec->getReplyBuilder()->getBodyBuilder().done().getOwned();
+        }();
+    }
+
+    void killAsyncCommand(ClientStrandPtr strand, ExecutionContext& ctx) const {
+        Future<void> future;
+        {
+            auto guard = strand->bind();
+            FailPointEnableBlock fp("hangBeforeRunningAsyncRequestExecutorTask");
+            future = ctx.invocation->runAsync(ctx.rec);
+
+            auto opCtx = ctx.rec->getOpCtx();
+            stdx::lock_guard<Client> lk(*opCtx->getClient());
+            opCtx->getServiceContext()->killOperation(lk, opCtx, ErrorCodes::Interrupted);
+        }
+
+        ASSERT_EQ(future.getNoThrow().code(), ErrorCodes::Interrupted);
+    }
+
+    Message mockMessage(StringData commandName) const {
+        OpMsgBuilder builder;
+        builder.setBody(BSON(commandName << 1 << "$db"
+                                         << "test"));
+        return builder.finish();
+    }
+};
+
+TEST_F(AsyncCommandExecutionTest, BuildInfo) {
+    runTestForCommand("buildinfo");
+}
+
+}  // namespace
+}  // namespace mongo
--- a/src/mongo/db/commands/dbcommands.cpp
+++ b/src/mongo/db/commands/dbcommands.cpp
@@ -83,13 +83,16 @@
 #include "mongo/db/repl/repl_client_info.h"
 #include "mongo/db/repl/repl_settings.h"
 #include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/request_execution_context.h"
 #include "mongo/db/s/collection_sharding_state.h"
 #include "mongo/db/stats/storage_stats.h"
 #include "mongo/db/storage/storage_engine_init.h"
 #include "mongo/db/write_concern.h"
+#include "mongo/executor/async_request_executor.h"
 #include "mongo/logv2/log.h"
 #include "mongo/scripting/engine.h"
 #include "mongo/util/fail_point.h"
+#include "mongo/util/future.h"
 #include "mongo/util/md5.hpp"
 #include "mongo/util/scopeguard.h"
 #include "mongo/util/version.h"
@@ -781,6 +784,26 @@ public:

 } cmdDBStats;

+// Provides the means to asynchronously run `buildinfo` commands.
+class BuildInfoExecutor final : public AsyncRequestExecutor {
+public:
+    BuildInfoExecutor() : AsyncRequestExecutor("BuildInfoExecutor") {}
+
+    Status handleRequest(std::shared_ptr<RequestExecutionContext> rec) {
+        auto result = rec->getReplyBuilder()->getBodyBuilder();
+        VersionInfoInterface::instance().appendBuildInfo(&result);
+        appendStorageEngineList(rec->getOpCtx()->getServiceContext(), &result);
+        return Status::OK();
+    }
+
+    static BuildInfoExecutor* get(ServiceContext* svc);
+};
+
+const auto getBuildInfoExecutor = ServiceContext::declareDecoration<BuildInfoExecutor>();
+BuildInfoExecutor* BuildInfoExecutor::get(ServiceContext* svc) {
+    return const_cast<BuildInfoExecutor*>(&getBuildInfoExecutor(svc));
+}
+
 class CmdBuildInfo : public BasicCommand {
 public:
    CmdBuildInfo() : BasicCommand("buildInfo", "buildinfo") {}
@@ -816,6 +839,11 @@ public:
        return true;
    }

+    Future<void> runAsync(std::shared_ptr<RequestExecutionContext> rec, std::string) override {
+        auto opCtx = rec->getOpCtx();
+        return BuildInfoExecutor::get(opCtx->getServiceContext())->schedule(std::move(rec));
+    }
+
 } cmdBuildInfo;

 }  // namespace
--- a/src/mongo/db/commands/server_status_servers.cpp
+++ b/src/mongo/db/commands/server_status_servers.cpp
@@ -33,6 +33,8 @@
 #include "mongo/db/commands/server_status.h"
 #include "mongo/transport/message_compressor_registry.h"
 #include "mongo/transport/service_entry_point.h"
+#include "mongo/transport/service_executor_fixed.h"
+#include "mongo/transport/service_executor_reserved.h"
 #include "mongo/transport/service_executor_synchronous.h"
 #include "mongo/util/net/hostname_canonicalization.h"
 #include "mongo/util/net/socket_utils.h"
@@ -78,16 +80,27 @@ public:
        return true;
    }

-    // TODO: need to track connections in server stats (see SERVER-49073)
    BSONObj generateSection(OperationContext* opCtx,
                            const BSONElement& configElement) const override {
        BSONObjBuilder b;
        networkCounter.append(b);
        appendMessageCompressionStats(&b);
-        auto executor = transport::ServiceExecutorSynchronous::get(opCtx->getServiceContext());
-        if (executor) {
-            BSONObjBuilder section(b.subobjStart("serviceExecutorTaskStats"));
-            executor->appendStats(&section);
+
+        {
+            BSONObjBuilder section = b.subobjStart("serviceExecutors");
+
+            auto svcCtx = opCtx->getServiceContext();
+            if (auto executor = transport::ServiceExecutorSynchronous::get(svcCtx)) {
+                executor->appendStats(&section);
+            }
+
+            if (auto executor = transport::ServiceExecutorReserved::get(svcCtx)) {
+                executor->appendStats(&section);
+            }
+
+            if (auto executor = transport::ServiceExecutorFixed::get(svcCtx)) {
+                executor->appendStats(&section);
+            }
        }

        return b.obj();
--- a/src/mongo/db/repl/tenant_migration_donor_util.cpp
+++ b/src/mongo/db/repl/tenant_migration_donor_util.cpp
@@ -41,6 +41,7 @@
 #include "mongo/executor/network_interface_factory.h"
 #include "mongo/executor/thread_pool_task_executor.h"
 #include "mongo/logv2/log.h"
+#include "mongo/util/assert_util.h"
 #include "mongo/util/concurrency/thread_pool.h"
 #include "mongo/util/fail_point.h"

@@ -209,6 +210,64 @@ void onWriteToDatabase(OperationContext* opCtx, StringData dbName) {
    }
 }

+class MigrationConflictHandler : public std::enable_shared_from_this<MigrationConflictHandler> {
+public:
+    MigrationConflictHandler(std::shared_ptr<RequestExecutionContext> rec,
+                             unique_function<Future<void>()> callable)
+        : _rec(std::move(rec)), _callable(std::move(callable)) {}
+
+    Future<void> run() try {
+        checkIfCanReadOrBlock(_rec->getOpCtx(), _rec->getRequest().getDatabase());
+        // callable will modify replyBuilder.
+        return _callable()
+            .then([this, anchor = shared_from_this()] { _checkReplyForTenantMigrationConflict(); })
+            .onError<ErrorCodes::TenantMigrationConflict>(
+                [this, anchor = shared_from_this()](Status status) {
+                    _handleTenantMigrationConflict(std::move(status));
+                    return Status::OK();
+                });
+    } catch (const DBException& e) {
+        return e.toStatus();
+    }
+
+private:
+    void _checkReplyForTenantMigrationConflict() {
+        auto replyBodyBuilder = _rec->getReplyBuilder()->getBodyBuilder();
+
+        // getStatusFromWriteCommandReply expects an 'ok' field.
+        CommandHelpers::extractOrAppendOk(replyBodyBuilder);
+
+        // Commands such as insert, update, delete, and applyOps return the result as a status
+        // rather than throwing.
+        const auto status = getStatusFromWriteCommandReply(replyBodyBuilder.asTempObj());
+
+        // Only throw `TenantMigrationConflict` exceptions.
+        if (status == ErrorCodes::TenantMigrationConflict)
+            internalAssert(status);
+    }
+
+    void _handleTenantMigrationConflict(Status status) {
+        auto migrationConflictInfo = status.extraInfo<TenantMigrationConflictInfo>();
+        invariant(migrationConflictInfo);
+
+        auto& mtabByPrefix =
+            TenantMigrationAccessBlockerByPrefix::get(_rec->getOpCtx()->getServiceContext());
+        if (auto mtab = mtabByPrefix.getTenantMigrationAccessBlockerForDbPrefix(
+                migrationConflictInfo->getDatabasePrefix())) {
+            _rec->getReplyBuilder()->getBodyBuilder().resetToEmpty();
+            mtab->checkIfCanWriteOrBlock(_rec->getOpCtx());
+        }
+    }
+
+    const std::shared_ptr<RequestExecutionContext> _rec;
+    const unique_function<Future<void>()> _callable;
+};
+
+Future<void> migrationConflictHandler(std::shared_ptr<RequestExecutionContext> rec,
+                                      unique_function<Future<void>()> callable) {
+    return std::make_shared<MigrationConflictHandler>(std::move(rec), std::move(callable))->run();
+}
+
 }  // namespace tenant_migration_donor

 }  // namespace mongo
--- a/src/mongo/db/repl/tenant_migration_donor_util.h
+++ b/src/mongo/db/repl/tenant_migration_donor_util.h
@@ -35,9 +35,12 @@
 #include "mongo/db/repl/tenant_migration_access_blocker_by_prefix.h"
 #include "mongo/db/repl/tenant_migration_conflict_info.h"
 #include "mongo/db/repl/tenant_migration_state_machine_gen.h"
+#include "mongo/db/request_execution_context.h"
 #include "mongo/executor/task_executor.h"
 #include "mongo/rpc/get_status_from_command_result.h"
 #include "mongo/rpc/reply_builder_interface.h"
+#include "mongo/util/functional.h"
+#include "mongo/util/future.h"

 namespace mongo {

@@ -73,46 +76,13 @@ void checkIfLinearizableReadWasAllowedOrThrow(OperationContext* opCtx, StringDat
 void onWriteToDatabase(OperationContext* opCtx, StringData dbName);

 /**
- * Runs the argument function 'callable'. If it throws a TenantMigrationConflict error (as indicated
- * in 'replyBuilder'), clears 'replyBuilder' and blocks until the migration commits or aborts, then
- * throws TenantMigrationCommitted or TenantMigrationAborted.
+ * Returns a future that asynchronously schedules and runs the argument function 'callable'. If it
+ * throws a TenantMigrationConflict error (as indicated in 'replyBuilder'), clears 'replyBuilder'
+ * and blocks until the migration commits or aborts, then returns TenantMigrationCommitted or
+ * TenantMigrationAborted.
 */
-template <typename Callable>
-void migrationConflictHandler(OperationContext* opCtx,
-                              StringData dbName,
-                              Callable&& callable,
-                              rpc::ReplyBuilderInterface* replyBuilder) {
-    checkIfCanReadOrBlock(opCtx, dbName);
-
-    auto& mtabByPrefix = TenantMigrationAccessBlockerByPrefix::get(opCtx->getServiceContext());
-
-    try {
-        // callable will modify replyBuilder.
-        callable();
-        auto replyBodyBuilder = replyBuilder->getBodyBuilder();
-
-        // getStatusFromWriteCommandReply expects an 'ok' field.
-        CommandHelpers::extractOrAppendOk(replyBodyBuilder);
-
-        // Commands such as insert, update, delete, and applyOps return the result as a status
-        // rather than throwing.
-        const auto status = getStatusFromWriteCommandReply(replyBodyBuilder.asTempObj());
-
-        if (status == ErrorCodes::TenantMigrationConflict) {
-            uassertStatusOK(status);
-        }
-        return;
-    } catch (const TenantMigrationConflictException& ex) {
-        auto migrationConflictInfo = ex.extraInfo<TenantMigrationConflictInfo>();
-        invariant(migrationConflictInfo);
-
-        if (auto mtab = mtabByPrefix.getTenantMigrationAccessBlockerForDbPrefix(
-                migrationConflictInfo->getDatabasePrefix())) {
-            replyBuilder->getBodyBuilder().resetToEmpty();
-            mtab->checkIfCanWriteOrBlock(opCtx);
-        }
-    }
-}
+Future<void> migrationConflictHandler(std::shared_ptr<RequestExecutionContext> rec,
+                                      unique_function<Future<void>()> callable);

 }  // namespace tenant_migration_donor

--- a/src/mongo/db/request_execution_context.h
+++ b/src/mongo/db/request_execution_context.h
@@ -0,0 +1,130 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <memory>
+
+#include "mongo/db/dbmessage.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/rpc/message.h"
+#include "mongo/rpc/op_msg.h"
+#include "mongo/rpc/reply_builder_interface.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo {
+
+class Command;
+
+/**
+ * Captures the execution context for a command to allow safe, shared and asynchronous accesses.
+ * This class owns all objects that participate in command execution (e.g., `request`). The only
+ * exceptions are `opCtx` and `command`. The `opCtx` remains valid so long as its corresponding
+ * client is attached to the executor thread. In case of `command`, it is a global, static
+ * construct and is safe to be accessed through raw pointers.
+ * Any access from a client thread that does not own the `opCtx`, or after the `opCtx` is
+ * released is strictly forbidden.
+ */
+class RequestExecutionContext {
+public:
+    RequestExecutionContext() = delete;
+    RequestExecutionContext(const RequestExecutionContext&) = delete;
+    RequestExecutionContext(RequestExecutionContext&&) = delete;
+
+    RequestExecutionContext(OperationContext* opCtx, Message message)
+        : _opCtx(opCtx),
+          _message(std::move(message)),
+          _dbmsg(std::make_unique<DbMessage>(_message.get())) {}
+
+    auto getOpCtx() const {
+        invariant(_isOnClientThread());
+        return _opCtx;
+    }
+
+    const Message& getMessage() const {
+        invariant(_isOnClientThread() && _message);
+        return _message.get();
+    }
+
+    DbMessage& getDbMessage() const {
+        invariant(_isOnClientThread() && _dbmsg);
+        return *_dbmsg.get();
+    }
+
+    void setRequest(OpMsgRequest request) {
+        invariant(_isOnClientThread() && !_request);
+        _request = std::move(request);
+    }
+    const OpMsgRequest& getRequest() const {
+        invariant(_isOnClientThread() && _request);
+        return _request.get();
+    }
+
+    void setCommand(Command* command) {
+        invariant(_isOnClientThread() && !_command);
+        _command = command;
+    }
+    Command* getCommand() const {
+        invariant(_isOnClientThread());
+        return _command;
+    }
+
+    void setReplyBuilder(std::unique_ptr<rpc::ReplyBuilderInterface> replyBuilder) {
+        invariant(_isOnClientThread() && !_replyBuilder);
+        _replyBuilder = std::move(replyBuilder);
+    }
+    auto getReplyBuilder() const {
+        invariant(_isOnClientThread() && _replyBuilder);
+        return _replyBuilder.get();
+    }
+
+    void setResponse(DbResponse response) {
+        invariant(_isOnClientThread());
+        _response = std::move(response);
+    }
+    DbResponse& getResponse() {
+        invariant(_isOnClientThread());
+        return _response;
+    }
+
+private:
+    bool _isOnClientThread() const {
+        return _opCtx != nullptr && Client::getCurrent() == _opCtx->getClient();
+    }
+
+    OperationContext* const _opCtx;
+    boost::optional<Message> _message;
+    std::unique_ptr<DbMessage> _dbmsg;
+    boost::optional<OpMsgRequest> _request;
+    Command* _command = nullptr;
+    std::unique_ptr<rpc::ReplyBuilderInterface> _replyBuilder;
+    DbResponse _response;
+};
+
+}  // namespace mongo
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
--- a/src/mongo/db/service_entry_point_common.h
+++ b/src/mongo/db/service_entry_point_common.h
@@ -84,7 +84,7 @@ struct ServiceEntryPointCommon {

        virtual void attachCurOpErrInfo(OperationContext* opCtx, const BSONObj& replyObj) const = 0;

-        virtual void handleException(const DBException& e, OperationContext* opCtx) const = 0;
+        virtual void handleException(const Status& status, OperationContext* opCtx) const = 0;

        virtual void advanceConfigOpTimeFromRequestMetadata(OperationContext* opCtx) const = 0;

@@ -101,7 +101,7 @@ struct ServiceEntryPointCommon {

    static Future<DbResponse> handleRequest(OperationContext* opCtx,
                                            const Message& m,
-                                            const Hooks& hooks) noexcept;
+                                            std::unique_ptr<const Hooks> hooks) noexcept;

    /**
     * Produce a new object based on cmdObj, but with redactions applied as specified by
--- a/src/mongo/db/service_entry_point_mongod.cpp
+++ b/src/mongo/db/service_entry_point_mongod.cpp
@@ -182,9 +182,9 @@ public:
        CurOp::get(opCtx)->debug().errInfo = getStatusFromCommandResult(replyObj);
    }

-    void handleException(const DBException& e, OperationContext* opCtx) const override {
+    void handleException(const Status& status, OperationContext* opCtx) const override {
        // If we got a stale config, wait in case the operation is stuck in a critical section
-        if (auto sce = e.extraInfo<StaleConfigInfo>()) {
+        if (auto sce = status.extraInfo<StaleConfigInfo>()) {
            // A config server acting as a router may return a StaleConfig exception, but a config
            // server won't contain data for a sharded collection, so skip handling the exception.
            if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
@@ -204,7 +204,7 @@ public:
                onShardVersionMismatchNoExcept(opCtx, sce->getNss(), sce->getVersionReceived())
                    .ignore();
            }
-        } else if (auto sce = e.extraInfo<StaleDbRoutingVersion>()) {
+        } else if (auto sce = status.extraInfo<StaleDbRoutingVersion>()) {
            if (!opCtx->getClient()->isInDirectClient()) {
                onDbVersionMismatchNoExcept(
                    opCtx, sce->getDb(), sce->getVersionReceived(), sce->getVersionWanted())
@@ -271,7 +271,7 @@ public:

 Future<DbResponse> ServiceEntryPointMongod::handleRequest(OperationContext* opCtx,
                                                          const Message& m) noexcept {
-    return ServiceEntryPointCommon::handleRequest(opCtx, m, Hooks{});
+    return ServiceEntryPointCommon::handleRequest(opCtx, m, std::make_unique<Hooks>());
 }

 }  // namespace mongo
--- a/src/mongo/embedded/service_entry_point_embedded.cpp
+++ b/src/mongo/embedded/service_entry_point_embedded.cpp
@@ -93,7 +93,7 @@ public:

    void attachCurOpErrInfo(OperationContext*, const BSONObj&) const override {}

-    void handleException(const DBException& e, OperationContext* opCtx) const override {}
+    void handleException(const Status& status, OperationContext* opCtx) const override {}

    void advanceConfigOpTimeFromRequestMetadata(OperationContext* opCtx) const override {}

@@ -117,7 +117,7 @@ Future<DbResponse> ServiceEntryPointEmbedded::handleRequest(OperationContext* op
    // guarantees of the state (that they have run).
    checked_cast<PeriodicRunnerEmbedded*>(opCtx->getServiceContext()->getPeriodicRunner())
        ->tryPump();
-    return ServiceEntryPointCommon::handleRequest(opCtx, m, Hooks{});
+    return ServiceEntryPointCommon::handleRequest(opCtx, m, std::make_unique<Hooks>());
 }

 void ServiceEntryPointEmbedded::startSession(transport::SessionHandle session) {
--- a/src/mongo/executor/SConscript
+++ b/src/mongo/executor/SConscript
@@ -270,6 +270,19 @@ env.Library(
    ],
 )

+env.Library(
+    target='async_request_executor',
+    source=[
+        'async_request_executor.cpp',
+    ],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/base',
+        '$BUILD_DIR/mongo/db/service_context',
+        '$BUILD_DIR/mongo/util/concurrency/thread_pool',
+        '$BUILD_DIR/mongo/util/fail_point',
+    ],
+)
+
 env.CppUnitTest(
    target='executor_test',
    source=[
--- a/src/mongo/executor/async_request_executor.cpp
+++ b/src/mongo/executor/async_request_executor.cpp
@@ -0,0 +1,88 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kExecutor
+
+#include "mongo/executor/async_request_executor.h"
+
+#include "mongo/db/client_strand.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/logv2/log.h"
+#include "mongo/platform/compiler.h"
+#include "mongo/util/fail_point.h"
+
+namespace mongo {
+
+MONGO_FAIL_POINT_DEFINE(hangBeforeRunningAsyncRequestExecutorTask);
+
+AsyncRequestExecutor::AsyncRequestExecutor(std::string name) : _name(std::move(name)) {
+    ThreadPool::Options options;
+    options.minThreads = 0;
+    options.maxThreads = 1;
+    _pool = std::make_unique<ThreadPool>(std::move(options));
+    _pool->startup();
+    LOGV2_DEBUG(
+        4910801, kDiagnosticLogLevel, "Started asynchronous request executor", "name"_attr = _name);
+}
+
+AsyncRequestExecutor::~AsyncRequestExecutor() {
+    _pool->shutdown();
+    _pool->join();
+    LOGV2_DEBUG(
+        4910802, kDiagnosticLogLevel, "Stopped asynchronous request executor", "name"_attr = _name);
+}
+
+Future<void> AsyncRequestExecutor::schedule(std::shared_ptr<RequestExecutionContext> rec) {
+    auto opCtx = rec->getOpCtx();
+    auto [promise, future] = makePromiseFuture<void>();
+
+    // `this` remains valid as it owns the instance of thread pool.
+    _pool->schedule([this,
+                     strand = ClientStrand::get(opCtx->getClient()),
+                     promise = std::move(promise),
+                     rec = std::move(rec)](Status status) mutable {
+        hangBeforeRunningAsyncRequestExecutorTask.pauseWhileSet();
+        strand->run([&] {
+            promise.setWith([&] {
+                if (MONGO_unlikely(!status.isOK()))
+                    return status.withContext("Unable to schedule asynchronous request");
+
+                auto opCtx = rec->getOpCtx();
+                if (opCtx->isKillPending())
+                    return Status(opCtx->getKillStatus(), "Asynchronous operation was interrupted");
+
+                return handleRequest(std::move(rec));
+            });
+        });
+    });
+
+    return std::move(future);
+}
+
+}  // namespace mongo
--- a/src/mongo/executor/async_request_executor.h
+++ b/src/mongo/executor/async_request_executor.h
@@ -0,0 +1,73 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "mongo/db/request_execution_context.h"
+#include "mongo/util/concurrency/thread_pool.h"
+#include "mongo/util/future.h"
+
+namespace mongo {
+
+/**
+ * The base class for constructing command-specific asynchronous executors.
+ * Requests (i.e., instances of `RequestExecutionContext`) are scheduled on a thread-pool, and
+ * passed to the command-specific implementation of `handleRequest`.
+ */
+class AsyncRequestExecutor {
+public:
+    AsyncRequestExecutor(AsyncRequestExecutor&&) = delete;
+    AsyncRequestExecutor(const AsyncRequestExecutor&) = delete;
+
+    explicit AsyncRequestExecutor(std::string name);
+    ~AsyncRequestExecutor();
+
+    /**
+     * Runs the command-specific code to handle the request.
+     * Must only access the request on the corresponding client thread.
+     */
+    virtual Status handleRequest(std::shared_ptr<RequestExecutionContext>) = 0;
+
+    /**
+     * Schedules the request on a thread pool (i.e., `_pool`) and calls into `handleRequest` to
+     * asynchronously execute the command.
+     */
+    Future<void> schedule(std::shared_ptr<RequestExecutionContext> rec);
+
+    static constexpr auto kDiagnosticLogLevel = 4;
+
+private:
+    const std::string _name;
+    std::unique_ptr<ThreadPool> _pool;
+};
+
+}  // namespace mongo
--- a/src/mongo/s/commands/SConscript
+++ b/src/mongo/s/commands/SConscript
@@ -103,6 +103,7 @@ env.Library(
    ],
    LIBDEPS=[
        '$BUILD_DIR/mongo/db/commands/servers',
+        '$BUILD_DIR/mongo/executor/async_request_executor',
    ],
    LIBDEPS_PRIVATE=[
        '$BUILD_DIR/mongo/db/audit',
--- a/src/mongo/s/commands/cluster_build_info.cpp
+++ b/src/mongo/s/commands/cluster_build_info.cpp
@@ -32,11 +32,33 @@
 #include "mongo/platform/basic.h"

 #include "mongo/db/commands.h"
+#include "mongo/db/request_execution_context.h"
+#include "mongo/executor/async_request_executor.h"
+#include "mongo/util/future.h"
 #include "mongo/util/version.h"

 namespace mongo {
 namespace {

+class ClusterBuildInfoExecutor final : public AsyncRequestExecutor {
+public:
+    ClusterBuildInfoExecutor() : AsyncRequestExecutor("ClusterBuildInfoExecutor") {}
+
+    Status handleRequest(std::shared_ptr<RequestExecutionContext> rec) {
+        auto result = rec->getReplyBuilder()->getBodyBuilder();
+        VersionInfoInterface::instance().appendBuildInfo(&result);
+        return Status::OK();
+    }
+
+    static ClusterBuildInfoExecutor* get(ServiceContext* svc);
+};
+
+const auto getClusterBuildInfoExecutor =
+    ServiceContext::declareDecoration<ClusterBuildInfoExecutor>();
+ClusterBuildInfoExecutor* ClusterBuildInfoExecutor::get(ServiceContext* svc) {
+    return const_cast<ClusterBuildInfoExecutor*>(&getClusterBuildInfoExecutor(svc));
+}
+
 class ClusterCmdBuildInfo : public BasicCommand {
 public:
    ClusterCmdBuildInfo() : BasicCommand("buildInfo", "buildinfo") {}
@@ -70,6 +92,11 @@ public:
        return true;
    }

+    Future<void> runAsync(std::shared_ptr<RequestExecutionContext> rec, std::string) override {
+        auto opCtx = rec->getOpCtx();
+        return ClusterBuildInfoExecutor::get(opCtx->getServiceContext())->schedule(std::move(rec));
+    }
+
 } cmdBuildInfo;

 }  // namespace
--- a/src/mongo/s/commands/cluster_command_test_fixture.cpp
+++ b/src/mongo/s/commands/cluster_command_test_fixture.cpp
@@ -125,7 +125,9 @@ DbResponse ClusterCommandTestFixture::runCommand(BSONObj cmd) {
    auto clusterGLE = ClusterLastErrorInfo::get(client.get());
    clusterGLE->newRequest();

-    return Strategy::clientCommand(opCtx.get(), opMsgRequest.serialize());
+    AlternativeClientRegion acr(client);
+    auto rec = std::make_shared<RequestExecutionContext>(opCtx.get(), opMsgRequest.serialize());
+    return Strategy::clientCommand(std::move(rec)).get();
 }

 void ClusterCommandTestFixture::runCommandSuccessful(BSONObj cmd, bool isTargeted) {
--- a/src/mongo/s/commands/strategy.cpp
+++ b/src/mongo/s/commands/strategy.cpp
--- a/src/mongo/s/commands/strategy.h
+++ b/src/mongo/s/commands/strategy.h
@@ -33,6 +33,7 @@

 #include "mongo/client/connection_string.h"
 #include "mongo/db/query/explain_options.h"
+#include "mongo/db/request_execution_context.h"
 #include "mongo/s/client/shard.h"

 namespace mongo {
@@ -74,7 +75,7 @@ public:
     * with the result from the operation. Doesn't send any response back and does not throw on
     * errors.
     */
-    static void writeOp(OperationContext* opCtx, DbMessage* dbm);
+    static void writeOp(std::shared_ptr<RequestExecutionContext> rec);

    /**
     * Executes a command from either OP_QUERY or OP_MSG wire protocols.
@@ -82,7 +83,7 @@ public:
     * Catches StaleConfigException errors and retries the command automatically after refreshing
     * the metadata for the failing namespace.
     */
-    static DbResponse clientCommand(OperationContext* opCtx, const Message& message);
+    static Future<DbResponse> clientCommand(std::shared_ptr<RequestExecutionContext> rec);

    /**
     * Helper to run an explain of a find operation on the shards. Fills 'out' with the result of
--- a/src/mongo/s/service_entry_point_mongos.cpp
+++ b/src/mongo/s/service_entry_point_mongos.cpp
@@ -29,6 +29,8 @@

 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kNetwork

+#include <memory>
+
 #include "mongo/platform/basic.h"

 #include "mongo/s/service_entry_point_mongos.h"
@@ -40,12 +42,12 @@
 #include "mongo/db/dbmessage.h"
 #include "mongo/db/lasterror.h"
 #include "mongo/db/operation_context.h"
+#include "mongo/db/request_execution_context.h"
 #include "mongo/db/service_context.h"
 #include "mongo/logv2/log.h"
 #include "mongo/rpc/message.h"
 #include "mongo/s/cluster_last_error_info.h"
 #include "mongo/s/commands/strategy.h"
-#include "mongo/util/scopeguard.h"

 namespace mongo {

@@ -60,16 +62,50 @@ BSONObj buildErrReply(const DBException& ex) {

 }  // namespace

+// Allows for decomposing `handleRequest` into parts and simplifies composing the future-chain.
+struct HandleRequest : public std::enable_shared_from_this<HandleRequest> {
+    struct OpRunnerBase;

-Future<DbResponse> ServiceEntryPointMongos::handleRequest(OperationContext* opCtx,
-                                                          const Message& message) noexcept try {
-    const int32_t msgId = message.header().getId();
-    const NetworkOp op = message.operation();
+    HandleRequest(OperationContext* opCtx, const Message& message)
+        : rec(std::make_shared<RequestExecutionContext>(opCtx, message)),
+          op(message.operation()),
+          msgId(message.header().getId()),
+          nsString(getNamespaceString(rec->getDbMessage())) {}
+
+    // Prepares the environment for handling the request (e.g., setting up `ClusterLastErrorInfo`).
+    void setupEnvironment();
+
+    // Returns a future that does the heavy lifting of running client commands.
+    Future<DbResponse> handleRequest();
+
+    // Runs on successful execution of the future returned by `handleRequest`.
+    void onSuccess(const DbResponse&);
+
+    // Returns a future-chain to handle the request and prepare the response.
+    Future<DbResponse> run();
+
+    static NamespaceString getNamespaceString(const DbMessage& dbmsg) {
+        if (!dbmsg.messageShouldHaveNs())
+            return {};
+        return NamespaceString(dbmsg.getns());
+    }
+
+    const std::shared_ptr<RequestExecutionContext> rec;
+    const NetworkOp op;
+    const int32_t msgId;
+    const NamespaceString nsString;
+
+    boost::optional<long long> slowMsOverride;
+};
+
+void HandleRequest::setupEnvironment() {
+    using namespace fmt::literals;
+    auto opCtx = rec->getOpCtx();

    // This exception will not be returned to the caller, but will be logged and will close the
    // connection
    uassert(ErrorCodes::IllegalOperation,
-            str::stream() << "Message type " << op << " is not supported.",
+            "Message type {} is not supported."_format(op),
            isSupportedRequestNetworkOp(op) &&
                op != dbCompressed);  // Decompression should be handled above us.

@@ -84,115 +120,174 @@ Future<DbResponse> ServiceEntryPointMongos::handleRequest(OperationContext* opCt
    AuthorizationSession::get(opCtx->getClient())->startRequest(opCtx);

    CurOp::get(opCtx)->ensureStarted();
+}

-    DbMessage dbm(message);
+// The base for various operation runners that handle the request, and often generate a DbResponse.
+struct HandleRequest::OpRunnerBase {
+    explicit OpRunnerBase(std::shared_ptr<HandleRequest> hr) : hr(std::move(hr)) {}
+    virtual ~OpRunnerBase() = default;
+    virtual Future<DbResponse> run() = 0;
+    const std::shared_ptr<HandleRequest> hr;
+};

-    // This is before the try block since it handles all exceptions that should not cause the
-    // connection to close.
-    if (op == dbMsg || (op == dbQuery && NamespaceString(dbm.getns()).isCommand())) {
-        auto dbResponse = Strategy::clientCommand(opCtx, message);
+struct CommandOpRunner final : public HandleRequest::OpRunnerBase {
+    using HandleRequest::OpRunnerBase::OpRunnerBase;
+    Future<DbResponse> run() override {
+        return Strategy::clientCommand(hr->rec).tap([hr = hr](const DbResponse&) {
+            // Hello should take kMaxAwaitTimeMs at most, log if it takes twice that.
+            if (auto command = CurOp::get(hr->rec->getOpCtx())->getCommand();
+                command && (command->getName() == "hello")) {
+                hr->slowMsOverride =
+                    2 * durationCount<Milliseconds>(SingleServerIsMasterMonitor::kMaxAwaitTime);
+            }
+        });
+    }
+};

-        // Hello should take kMaxAwaitTimeMs at most, log if it takes twice that.
-        boost::optional<long long> slowMsOverride;
-        if (auto command = CurOp::get(opCtx)->getCommand();
-            command && (command->getName() == "hello")) {
-            slowMsOverride =
-                2 * durationCount<Milliseconds>(SingleServerIsMasterMonitor::kMaxAwaitTime);
-        }
+// The base for operations that may throw exceptions, but should not cause the connection to close.
+struct OpRunner : public HandleRequest::OpRunnerBase {
+    using HandleRequest::OpRunnerBase::OpRunnerBase;
+    virtual DbResponse runOperation() = 0;
+    Future<DbResponse> run() override;
+};

-        // Mark the op as complete, populate the response length, and log it if appropriate.
-        CurOp::get(opCtx)->completeAndLogOperation(
-            opCtx, logv2::LogComponent::kCommand, dbResponse.response.size(), slowMsOverride);
+Future<DbResponse> OpRunner::run() try {
+    using namespace fmt::literals;
+    const NamespaceString& nss = hr->nsString;
+    const DbMessage& dbm = hr->rec->getDbMessage();

-        return Future<DbResponse>::makeReady(std::move(dbResponse));
+    if (dbm.messageShouldHaveNs()) {
+        uassert(ErrorCodes::InvalidNamespace, "Invalid ns [{}]"_format(nss.ns()), nss.isValid());
+
+        uassert(ErrorCodes::IllegalOperation,
+                "Can't use 'local' database through mongos",
+                nss.db() != NamespaceString::kLocalDb);
    }

-    NamespaceString nss;
-    DbResponse dbResponse;
-    try {
-        if (dbm.messageShouldHaveNs()) {
-            nss = NamespaceString(StringData(dbm.getns()));
+    LOGV2_DEBUG(22867,
+                3,
+                "Request::process begin ns: {namespace} msg id: {msgId} op: {operation}",
+                "Starting operation",
+                "namespace"_attr = nss,
+                "msgId"_attr = hr->msgId,
+                "operation"_attr = networkOpToString(hr->op));

-            uassert(ErrorCodes::InvalidNamespace,
-                    str::stream() << "Invalid ns [" << nss.ns() << "]",
-                    nss.isValid());
+    auto dbResponse = runOperation();

-            uassert(ErrorCodes::IllegalOperation,
-                    "Can't use 'local' database through mongos",
-                    nss.db() != NamespaceString::kLocalDb);
-        }
-
-
-        LOGV2_DEBUG(22867,
-                    3,
-                    "Request::process begin ns: {namespace} msg id: {msgId} op: {operation}",
-                    "Starting operation",
-                    "namespace"_attr = nss,
-                    "msgId"_attr = msgId,
-                    "operation"_attr = networkOpToString(op));
-
-        switch (op) {
-            case dbQuery:
-                // Commands are handled above through Strategy::clientCommand().
-                invariant(!nss.isCommand());
-                opCtx->markKillOnClientDisconnect();
-                dbResponse = Strategy::queryOp(opCtx, nss, &dbm);
-                break;
-
-            case dbGetMore:
-                dbResponse = Strategy::getMore(opCtx, nss, &dbm);
-                break;
-
-            case dbKillCursors:
-                Strategy::killCursors(opCtx, &dbm);  // No Response.
-                break;
-
-            case dbInsert:
-            case dbUpdate:
-            case dbDelete:
-                Strategy::writeOp(opCtx, &dbm);  // No Response.
-                break;
-
-            default:
-                MONGO_UNREACHABLE;
-        }
-
-        LOGV2_DEBUG(22868,
-                    3,
-                    "Request::process end ns: {namespace} msg id: {msgId} op: {operation}",
-                    "Done processing operation",
-                    "namespace"_attr = nss,
-                    "msgId"_attr = msgId,
-                    "operation"_attr = networkOpToString(op));
-
-    } catch (const DBException& ex) {
-        LOGV2_DEBUG(22869,
-                    1,
-                    "Exception thrown while processing {operation} op for {namespace}: {error}",
-                    "Got an error while processing operation",
-                    "operation"_attr = networkOpToString(op),
-                    "namespace"_attr = nss.ns(),
-                    "error"_attr = ex);
-
-        if (op == dbQuery || op == dbGetMore) {
-            dbResponse = replyToQuery(buildErrReply(ex), ResultFlag_ErrSet);
-        } else {
-            // No Response.
-        }
-
-        // We *always* populate the last error for now
-        LastError::get(opCtx->getClient()).setLastError(ex.code(), ex.what());
-        CurOp::get(opCtx)->debug().errInfo = ex.toStatus();
-    }
-
-    // Mark the op as complete, populate the response length, and log it if appropriate.
-    CurOp::get(opCtx)->completeAndLogOperation(
-        opCtx, logv2::LogComponent::kCommand, dbResponse.response.size());
+    LOGV2_DEBUG(22868,
+                3,
+                "Request::process end ns: {namespace} msg id: {msgId} op: {operation}",
+                "Done processing operation",
+                "namespace"_attr = nss,
+                "msgId"_attr = hr->msgId,
+                "operation"_attr = networkOpToString(hr->op));

    return Future<DbResponse>::makeReady(std::move(dbResponse));
-} catch (const DBException& e) {
-    LOGV2(4879803, "Failed to handle request", "error"_attr = redact(e));
-    return e.toStatus();
+} catch (const DBException& ex) {
+    LOGV2_DEBUG(22869,
+                1,
+                "Exception thrown while processing {operation} op for {namespace}: {error}",
+                "Got an error while processing operation",
+                "operation"_attr = networkOpToString(hr->op),
+                "namespace"_attr = hr->nsString.ns(),
+                "error"_attr = ex);
+
+    DbResponse dbResponse;
+    if (hr->op == dbQuery || hr->op == dbGetMore) {
+        dbResponse = replyToQuery(buildErrReply(ex), ResultFlag_ErrSet);
+    } else {
+        // No Response.
+    }
+
+    // We *always* populate the last error for now
+    auto opCtx = hr->rec->getOpCtx();
+    LastError::get(opCtx->getClient()).setLastError(ex.code(), ex.what());
+
+    CurOp::get(opCtx)->debug().errInfo = ex.toStatus();
+
+    return Future<DbResponse>::makeReady(std::move(dbResponse));
+}
+
+struct QueryOpRunner final : public OpRunner {
+    using OpRunner::OpRunner;
+    DbResponse runOperation() override {
+        // Commands are handled through CommandOpRunner and Strategy::clientCommand().
+        invariant(!hr->nsString.isCommand());
+        hr->rec->getOpCtx()->markKillOnClientDisconnect();
+        return Strategy::queryOp(hr->rec->getOpCtx(), hr->nsString, &hr->rec->getDbMessage());
+    }
+};
+
+struct GetMoreOpRunner final : public OpRunner {
+    using OpRunner::OpRunner;
+    DbResponse runOperation() override {
+        return Strategy::getMore(hr->rec->getOpCtx(), hr->nsString, &hr->rec->getDbMessage());
+    }
+};
+
+struct KillCursorsOpRunner final : public OpRunner {
+    using OpRunner::OpRunner;
+    DbResponse runOperation() override {
+        Strategy::killCursors(hr->rec->getOpCtx(), &hr->rec->getDbMessage());  // No Response.
+        return {};
+    }
+};
+
+struct WriteOpRunner final : public OpRunner {
+    using OpRunner::OpRunner;
+    DbResponse runOperation() override {
+        Strategy::writeOp(hr->rec);  // No Response.
+        return {};
+    }
+};
+
+Future<DbResponse> HandleRequest::handleRequest() {
+    switch (op) {
+        case dbQuery:
+            if (!nsString.isCommand())
+                return std::make_unique<QueryOpRunner>(shared_from_this())->run();
+        // FALLTHROUGH: it's a query containing a command
+        case dbMsg:
+            return std::make_unique<CommandOpRunner>(shared_from_this())->run();
+        case dbGetMore:
+            return std::make_unique<GetMoreOpRunner>(shared_from_this())->run();
+        case dbKillCursors:
+            return std::make_unique<KillCursorsOpRunner>(shared_from_this())->run();
+        case dbInsert:
+        case dbUpdate:
+        case dbDelete:
+            return std::make_unique<WriteOpRunner>(shared_from_this())->run();
+        default:
+            MONGO_UNREACHABLE;
+    }
+}
+
+void HandleRequest::onSuccess(const DbResponse& dbResponse) {
+    auto opCtx = rec->getOpCtx();
+    // Mark the op as complete, populate the response length, and log it if appropriate.
+    CurOp::get(opCtx)->completeAndLogOperation(
+        opCtx, logv2::LogComponent::kCommand, dbResponse.response.size(), slowMsOverride);
+}
+
+Future<DbResponse> HandleRequest::run() {
+    auto fp = makePromiseFuture<void>();
+    auto future = std::move(fp.future)
+                      .then([this, anchor = shared_from_this()] { setupEnvironment(); })
+                      .then([this, anchor = shared_from_this()] { return handleRequest(); })
+                      .tap([this, anchor = shared_from_this()](const DbResponse& dbResponse) {
+                          onSuccess(dbResponse);
+                      })
+                      .tapError([](Status status) {
+                          LOGV2(4879803, "Failed to handle request", "error"_attr = redact(status));
+                      });
+    fp.promise.emplaceValue();
+    return future;
+}
+
+Future<DbResponse> ServiceEntryPointMongos::handleRequest(OperationContext* opCtx,
+                                                          const Message& message) noexcept {
+    auto hr = std::make_shared<HandleRequest>(opCtx, message);
+    return hr->run();
 }

 }  // namespace mongo
--- a/src/mongo/tools/bridge.cpp
+++ b/src/mongo/tools/bridge.cpp
@@ -485,9 +485,6 @@ int bridgeMain(int argc, char** argv) {
    setGlobalServiceContext(ServiceContext::make());
    auto serviceContext = getGlobalServiceContext();
    serviceContext->setServiceEntryPoint(std::make_unique<ServiceEntryPointBridge>(serviceContext));
-    if (auto status = serviceContext->getServiceEntryPoint()->start(); !status.isOK()) {
-        LOGV2(4907203, "Error starting service entry point", "error"_attr = status);
-    }

    transport::TransportLayerASIO::Options opts;
    opts.ipList.emplace_back("0.0.0.0");
@@ -501,6 +498,10 @@ int bridgeMain(int argc, char** argv) {
        return EXIT_NET_ERROR;
    }

+    if (auto status = serviceContext->getServiceEntryPoint()->start(); !status.isOK()) {
+        LOGV2(4907203, "Error starting service entry point", "error"_attr = status);
+    }
+
    if (auto status = tl->start(); !status.isOK()) {
        LOGV2(22923, "Error starting transport layer", "error"_attr = status);
        return EXIT_NET_ERROR;
--- a/src/mongo/transport/SConscript
+++ b/src/mongo/transport/SConscript
@@ -188,6 +188,7 @@ tlEnv.CppUnitTest(
        '$BUILD_DIR/mongo/base',
        '$BUILD_DIR/mongo/db/dbmessage',
        '$BUILD_DIR/mongo/db/service_context',
+        '$BUILD_DIR/mongo/db/service_context_test_fixture',
        '$BUILD_DIR/mongo/rpc/protocol',
        '$BUILD_DIR/mongo/rpc/rpc',
        '$BUILD_DIR/mongo/unittest/unittest',
--- a/src/mongo/transport/mock_session.h
+++ b/src/mongo/transport/mock_session.h
@@ -103,7 +103,11 @@ public:
        return Future<Message>::makeReady(sourceMessage());
    }

-    Future<void> waitForData() override {
+    Status waitForData() override {
+        return asyncWaitForData().getNoThrow();
+    }
+
+    Future<void> asyncWaitForData() override {
        auto fp = makePromiseFuture<void>();
        stdx::lock_guard<Latch> lk(_waitForDataMutex);
        _waitForDataQueue.emplace_back(std::move(fp.promise));
--- a/src/mongo/transport/service_entry_point_impl.cpp
+++ b/src/mongo/transport/service_entry_point_impl.cpp
@@ -40,6 +40,8 @@
 #include "mongo/db/service_context.h"
 #include "mongo/logv2/log.h"
 #include "mongo/transport/ismaster_metrics.h"
+#include "mongo/transport/service_executor.h"
+#include "mongo/transport/service_executor_gen.h"
 #include "mongo/transport/service_state_machine.h"
 #include "mongo/transport/session.h"
 #include "mongo/util/processinfo.h"
@@ -142,15 +144,13 @@ Status ServiceEntryPointImpl::start() {
        }
    }

-    // TODO: Reintroduce SEF once it is attached as initial SE in SERVER-49109
-    // if (auto status = transport::ServiceExecutorFixed::get(_svcCtx)->start(); !status.isOK()) {
-    //     return status;
-    // }
+    if (auto status = transport::ServiceExecutorFixed::get(_svcCtx)->start(); !status.isOK()) {
+        return status;
+    }

    return Status::OK();
 }

-// TODO: explicitly start on the fixed executor
 void ServiceEntryPointImpl::startSession(transport::SessionHandle session) {
    // Setup the restriction environment on the Session, if the Session has local/remote Sockaddrs
    const auto& remoteAddr = session->remoteAddr();
@@ -164,16 +164,6 @@ void ServiceEntryPointImpl::startSession(transport::SessionHandle session) {
    auto clientName = "conn{}"_format(session->id());
    auto client = _svcCtx->makeClient(clientName, session);

-    {
-        stdx::lock_guard lk(*client);
-        auto seCtx =
-            transport::ServiceExecutorContext{}
-                .setThreadingModel(transport::ServiceExecutorContext::ThreadingModel::kDedicated)
-                .setCanUseReserved(canOverrideMaxConns);
-
-        transport::ServiceExecutorContext::set(client.get(), std::move(seCtx));
-    }
-
    auto ssm = std::make_shared<transport::ServiceStateMachine>(std::move(client));

    const bool quiet = serverGlobalParams.quiet.load();
@@ -197,6 +187,7 @@ void ServiceEntryPointImpl::startSession(transport::SessionHandle session) {
        if (!quiet) {
            LOGV2(22942,
                  "Connection refused because there are too many open connections",
+                  "remote"_attr = session->remote(),
                  "connectionCount"_attr = connectionCount);
        }
        return;
@@ -228,7 +219,10 @@ void ServiceEntryPointImpl::startSession(transport::SessionHandle session) {
        }
    });

-    ssm->start();
+    auto seCtx = transport::ServiceExecutorContext{};
+    seCtx.setThreadingModel(transport::ServiceExecutor::getInitialThreadingModel());
+    seCtx.setCanUseReserved(canOverrideMaxConns);
+    ssm->start(std::move(seCtx));
 }

 void ServiceEntryPointImpl::endAllSessions(transport::Session::TagMask tags) {
@@ -289,13 +283,12 @@ bool ServiceEntryPointImpl::shutdown(Milliseconds timeout) {

    lk.unlock();

-    // TODO: Reintroduce SEF once it is attached as initial SE in SERVER-49109
-    // timeSpent = _svcCtx->getPreciseClockSource()->now() - start;
-    // timeout = std::max(Milliseconds{0}, timeout - timeSpent);
-    // if (auto status = transport::ServiceExecutorFixed::get(_svcCtx)->shutdown(timeout);
-    //     !status.isOK()) {
-    //     LOGV2(4907202, "Failed to shutdown ServiceExecutorFixed", "error"_attr = status);
-    // }
+    timeSpent = _svcCtx->getPreciseClockSource()->now() - start;
+    timeout = std::max(Milliseconds{0}, timeout - timeSpent);
+    if (auto status = transport::ServiceExecutorFixed::get(_svcCtx)->shutdown(timeout);
+        !status.isOK()) {
+        LOGV2(4907202, "Failed to shutdown ServiceExecutorFixed", "error"_attr = status);
+    }

    timeSpent = _svcCtx->getPreciseClockSource()->now() - start;
    timeout = std::max(Milliseconds{0}, timeout - timeSpent);
@@ -329,15 +322,17 @@ void ServiceEntryPointImpl::appendStats(BSONObjBuilder* bob) const {

    invariant(_svcCtx);
    bob->append("active", static_cast<int>(_svcCtx->getActiveClientOperations()));
+
+    const auto seStats = transport::ServiceExecutorStats::get(_svcCtx);
+    bob->append("threaded", static_cast<int>(seStats.usesDedicated));
+    if (serverGlobalParams.maxConnsOverride.size()) {
+        bob->append("limitExempt", static_cast<int>(seStats.limitExempt));
+    }
+
    bob->append("exhaustIsMaster",
                static_cast<int>(IsMasterMetrics::get(_svcCtx)->getNumExhaustIsMaster()));
    bob->append("awaitingTopologyChanges",
                static_cast<int>(IsMasterMetrics::get(_svcCtx)->getNumAwaitingTopologyChanges()));
-
-    if (auto adminExec = transport::ServiceExecutorReserved::get(_svcCtx)) {
-        BSONObjBuilder section(bob->subobjStart("adminConnections"));
-        adminExec->appendStats(&section);
-    }
 }

 }  // namespace mongo
--- a/src/mongo/transport/service_executor.cpp
+++ b/src/mongo/transport/service_executor.cpp
@@ -40,27 +40,55 @@
 #include "mongo/transport/service_executor_fixed.h"
 #include "mongo/transport/service_executor_reserved.h"
 #include "mongo/transport/service_executor_synchronous.h"
+#include "mongo/util/synchronized_value.h"

 namespace mongo {
 namespace transport {
 namespace {
 static constexpr auto kDiagnosticLogLevel = 4;

+static constexpr auto kThreadingModelDedicatedStr = "dedicated"_sd;
+static constexpr auto kThreadingModelBorrowedStr = "borrowed"_sd;
+
+auto gInitialThreadingModel = ServiceExecutor::ThreadingModel::kDedicated;
+
+auto getServiceExecutorStats =
+    ServiceContext::declareDecoration<synchronized_value<ServiceExecutorStats>>();
 auto getServiceExecutorContext =
    Client::declareDecoration<boost::optional<ServiceExecutorContext>>();
 }  // namespace

-StringData toString(ServiceExecutorContext::ThreadingModel threadingModel) {
+StringData toString(ServiceExecutor::ThreadingModel threadingModel) {
    switch (threadingModel) {
-        case ServiceExecutorContext::ThreadingModel::kDedicated:
-            return "Dedicated"_sd;
-        case ServiceExecutorContext::ThreadingModel::kBorrowed:
-            return "Borrowed"_sd;
+        case ServiceExecutor::ThreadingModel::kDedicated:
+            return kThreadingModelDedicatedStr;
+        case ServiceExecutor::ThreadingModel::kBorrowed:
+            return kThreadingModelBorrowedStr;
        default:
            MONGO_UNREACHABLE;
    }
 }

+Status ServiceExecutor::setInitialThreadingModel(StringData value) noexcept {
+    if (value == kThreadingModelDedicatedStr) {
+        gInitialThreadingModel = ServiceExecutor::ThreadingModel::kDedicated;
+    } else if (value == kThreadingModelBorrowedStr) {
+        gInitialThreadingModel = ServiceExecutor::ThreadingModel::kBorrowed;
+    } else {
+        MONGO_UNREACHABLE;
+    }
+
+    return Status::OK();
+}
+
+auto ServiceExecutor::getInitialThreadingModel() noexcept -> ThreadingModel {
+    return gInitialThreadingModel;
+}
+
+ServiceExecutorStats ServiceExecutorStats::get(ServiceContext* ctx) noexcept {
+    return getServiceExecutorStats(ctx).get();
+}
+
 ServiceExecutorContext* ServiceExecutorContext::get(Client* client) noexcept {
    auto& serviceExecutorContext = getServiceExecutorContext(client);

@@ -79,6 +107,24 @@ void ServiceExecutorContext::set(Client* client, ServiceExecutorContext seCtx) n
    seCtx._client = client;
    seCtx._sep = client->getServiceContext()->getServiceEntryPoint();

+    {
+        auto stats = getServiceExecutorStats(client->getServiceContext()).synchronize();
+        if (seCtx._canUseReserved) {
+            ++stats->limitExempt;
+        }
+
+        switch (seCtx._threadingModel) {
+            case ThreadingModel::kBorrowed: {
+                ++stats->usesBorrowed;
+            } break;
+            case ThreadingModel::kDedicated: {
+                ++stats->usesDedicated;
+            } break;
+            default:
+                MONGO_UNREACHABLE;
+        }
+    }
+
    LOGV2_DEBUG(4898000,
                kDiagnosticLogLevel,
                "Setting initial ServiceExecutor context for client",
@@ -88,18 +134,91 @@ void ServiceExecutorContext::set(Client* client, ServiceExecutorContext seCtx) n
    serviceExecutorContext = std::move(seCtx);
 }

-ServiceExecutorContext& ServiceExecutorContext::setThreadingModel(
-    ThreadingModel threadingModel) noexcept {
-    _threadingModel = threadingModel;
-    return *this;
+void ServiceExecutorContext::reset(Client* client) noexcept {
+    if (client) {
+        auto& serviceExecutorContext = getServiceExecutorContext(client);
+
+        auto stats = getServiceExecutorStats(client->getServiceContext()).synchronize();
+
+        LOGV2_DEBUG(4898001,
+                    kDiagnosticLogLevel,
+                    "Resetting ServiceExecutor context for client",
+                    "client"_attr = client->desc(),
+                    "threadingModel"_attr = serviceExecutorContext->_threadingModel,
+                    "canUseReserved"_attr = serviceExecutorContext->_canUseReserved);
+
+        if (serviceExecutorContext->_canUseReserved) {
+            --stats->limitExempt;
+        }
+
+        switch (serviceExecutorContext->_threadingModel) {
+            case ThreadingModel::kBorrowed: {
+                --stats->usesBorrowed;
+            } break;
+            case ThreadingModel::kDedicated: {
+                --stats->usesDedicated;
+            } break;
+            default:
+                MONGO_UNREACHABLE;
+        }
+    }
 }

-ServiceExecutorContext& ServiceExecutorContext::setCanUseReserved(bool canUseReserved) noexcept {
+void ServiceExecutorContext::setThreadingModel(ThreadingModel threadingModel) noexcept {
+
+    if (_threadingModel == threadingModel) {
+        // Nothing to do.
+        return;
+    }
+
+    auto lastThreadingModel = std::exchange(_threadingModel, threadingModel);
+
+    if (_client) {
+        auto stats = getServiceExecutorStats(_client->getServiceContext()).synchronize();
+
+        // Decrement the stats for the previous ThreadingModel.
+        switch (lastThreadingModel) {
+            case ThreadingModel::kBorrowed: {
+                --stats->usesBorrowed;
+            } break;
+            case ThreadingModel::kDedicated: {
+                --stats->usesDedicated;
+            } break;
+            default:
+                MONGO_UNREACHABLE;
+        }
+        // Increment the stats for the next ThreadingModel.
+        switch (_threadingModel) {
+            case ThreadingModel::kBorrowed: {
+                ++stats->usesBorrowed;
+            } break;
+            case ThreadingModel::kDedicated: {
+                ++stats->usesDedicated;
+            } break;
+            default:
+                MONGO_UNREACHABLE;
+        }
+    }
+}
+
+void ServiceExecutorContext::setCanUseReserved(bool canUseReserved) noexcept {
+    if (_canUseReserved == canUseReserved) {
+        // Nothing to do.
+        return;
+    }
+
    _canUseReserved = canUseReserved;
-    return *this;
+    if (_client) {
+        auto stats = getServiceExecutorStats(_client->getServiceContext()).synchronize();
+        if (canUseReserved) {
+            ++stats->limitExempt;
+        } else {
+            --stats->limitExempt;
+        }
+    }
 }

-ServiceExecutor* ServiceExecutorContext::getServiceExecutor() const noexcept {
+ServiceExecutor* ServiceExecutorContext::getServiceExecutor() noexcept {
    invariant(_client);

    switch (_threadingModel) {
@@ -121,13 +240,16 @@ ServiceExecutor* ServiceExecutorContext::getServiceExecutor() const noexcept {
        return _sep->numOpenSessions() > _sep->maxOpenSessions();
    };

-    if (_canUseReserved && shouldUseReserved()) {
+    if (_canUseReserved && !_hasUsedSynchronous && shouldUseReserved()) {
        if (auto exec = transport::ServiceExecutorReserved::get(_client->getServiceContext())) {
-            // We are allowed to use the reserved executor, we should use it, and it exists.
+            // We are allowed to use the reserved, we have not used the synchronous, we should use
+            // the reserved, and the reserved exists.
            return exec;
        }
    }

+    // Once we use the ServiceExecutorSynchronous, we shouldn't use the ServiceExecutorReserved.
+    _hasUsedSynchronous = true;
    return transport::ServiceExecutorSynchronous::get(_client->getServiceContext());
 }

--- a/src/mongo/transport/service_executor.h
+++ b/src/mongo/transport/service_executor.h
@@ -51,6 +51,20 @@ namespace transport {
 */
 class ServiceExecutor : public OutOfLineExecutor {
 public:
+    /**
+     * An enum to indicate if a ServiceExecutor should use dedicated or borrowed threading
+     * resources.
+     */
+    enum class ThreadingModel {
+        kBorrowed,
+        kDedicated,
+    };
+
+    friend StringData toString(ThreadingModel threadingModel);
+
+    static Status setInitialThreadingModel(StringData value) noexcept;
+    static ThreadingModel getInitialThreadingModel() noexcept;
+
    virtual ~ServiceExecutor() = default;
    using Task = unique_function<void()>;
    enum ScheduleFlags {
@@ -100,7 +114,7 @@ public:
     * schedule the callback on current executor. Otherwise, it will invoke the callback with a
     * non-okay status on the caller thread.
     */
-    virtual void runOnDataAvailable(Session* session,
+    virtual void runOnDataAvailable(const SessionHandle& session,
                                    OutOfLineExecutor::Task onCompletionCallback) = 0;

    /*
@@ -127,10 +141,7 @@ public:
 */
 class ServiceExecutorContext {
 public:
-    enum ThreadingModel {
-        kBorrowed,
-        kDedicated,
-    };
+    using ThreadingModel = ServiceExecutor::ThreadingModel;

    /**
     * Get a pointer to the ServiceExecutorContext for a given client.
@@ -146,21 +157,43 @@ public:
     */
    static void set(Client* client, ServiceExecutorContext seCtx) noexcept;

+
+    /**
+     * Reset the ServiceExecutorContext for a given client.
+     *
+     * This function may only be invoked once and only while under the Client lock.
+     */
+    static void reset(Client* client) noexcept;
+
    ServiceExecutorContext() = default;
+    ServiceExecutorContext(const ServiceExecutorContext&) = delete;
+    ServiceExecutorContext& operator=(const ServiceExecutorContext&) = delete;
+    ServiceExecutorContext(ServiceExecutorContext&& seCtx)
+        : _client{std::exchange(seCtx._client, nullptr)},
+          _sep{std::exchange(seCtx._sep, nullptr)},
+          _threadingModel{seCtx._threadingModel},
+          _canUseReserved{seCtx._canUseReserved} {}
+    ServiceExecutorContext& operator=(ServiceExecutorContext&& seCtx) {
+        _client = std::exchange(seCtx._client, nullptr);
+        _sep = std::exchange(seCtx._sep, nullptr);
+        _threadingModel = seCtx._threadingModel;
+        _canUseReserved = seCtx._canUseReserved;
+        return *this;
+    }

    /**
     * Set the ThreadingModel for the associated Client's service execution.
     *
     * This function is only valid to invoke with the Client lock or before the Client is set.
     */
-    ServiceExecutorContext& setThreadingModel(ThreadingModel threadingModel) noexcept;
+    void setThreadingModel(ThreadingModel threadingModel) noexcept;

    /**
     * Set if reserved resources are available for the associated Client's service execution.
     *
     * This function is only valid to invoke with the Client lock or before the Client is set.
     */
-    ServiceExecutorContext& setCanUseReserved(bool canUseReserved) noexcept;
+    void setCanUseReserved(bool canUseReserved) noexcept;

    /**
     * Get the ThreadingModel for the associated Client.
@@ -177,18 +210,39 @@ public:
     * This function is only valid to invoke from the associated Client thread. This function does
     * not require the Client lock since all writes must also happen from that thread.
     */
-    ServiceExecutor* getServiceExecutor() const noexcept;
+    ServiceExecutor* getServiceExecutor() noexcept;

 private:
-    friend StringData toString(ThreadingModel threadingModel);
-
    Client* _client = nullptr;
    ServiceEntryPoint* _sep = nullptr;

    ThreadingModel _threadingModel = ThreadingModel::kDedicated;
    bool _canUseReserved = false;
+    bool _hasUsedSynchronous = false;
 };

+/**
+ * A small statlet for tracking which executors may be in use.
+ */
+class ServiceExecutorStats {
+public:
+    /**
+     * Get the current value of ServiceExecutorStats for the given ServiceContext.
+     *
+     * Note that this value is intended for statistics and logging. It is unsynchronized and
+     * unsuitable for informing decisions in runtime.
+     */
+    static ServiceExecutorStats get(ServiceContext* ctx) noexcept;
+
+    // The number of Clients who use the dedicated executors.
+    size_t usesDedicated = 0;
+
+    // The number of Clients who use the borrowed executors.
+    size_t usesBorrowed = 0;
+
+    // The number of Clients that are allowed to ignore maxConns and use reserved resources.
+    size_t limitExempt = 0;
+};

 }  // namespace transport

--- a/src/mongo/transport/service_executor.idl
+++ b/src/mongo/transport/service_executor.idl
@@ -28,8 +28,18 @@

 global:
  cpp_namespace: "mongo::transport"
+  cpp_includes:
+    - "mongo/transport/service_executor.h"

 server_parameters:
+  initialServiceExecutorThreadingModel:
+    description: >-
+        Start new client connections using an executor that follows this model.
+    set_at: [ startup ]
+    cpp_vartype: "std::string"
+    cpp_varname: "initialServiceExecutorThreadingModel"
+    on_update: "ServiceExecutor::setInitialThreadingModel"
+    default: "dedicated"
  synchronousServiceExecutorRecursionLimit:
    description: >-
        Tasks may recurse further if their recursion depth is less than this value.
@@ -53,3 +63,14 @@ server_parameters:
    cpp_vartype: 'AtomicWord<int>'
    cpp_varname: reservedServiceExecutorRecursionLimit
    default: 8
+
+  fixedServiceExecutorThreadLimit:
+    description: >-
+        The fixed service executor (thread model "borrowed") can only maintain a count of threads
+        less than this value.
+    set_at: [ startup ]
+    cpp_vartype: "int"
+    cpp_varname: "fixedServiceExecutorThreadLimit"
+    default: 1000
+    validator:
+        gte: 10
--- a/src/mongo/transport/service_executor_fixed.cpp
+++ b/src/mongo/transport/service_executor_fixed.cpp
@@ -35,62 +35,188 @@
 #include "mongo/logv2/log.h"
 #include "mongo/transport/service_executor_gen.h"
 #include "mongo/transport/session.h"
+#include "mongo/transport/transport_layer.h"
 #include "mongo/util/assert_util.h"
 #include "mongo/util/fail_point.h"
+#include "mongo/util/testing_proctor.h"
 #include "mongo/util/thread_safety_context.h"

 namespace mongo {

 MONGO_FAIL_POINT_DEFINE(hangBeforeSchedulingServiceExecutorFixedTask);
+MONGO_FAIL_POINT_DEFINE(hangAfterServiceExecutorFixedExecutorThreadsStart);
+MONGO_FAIL_POINT_DEFINE(hangBeforeServiceExecutorFixedLastExecutorThreadReturns);

 namespace transport {
 namespace {
-constexpr auto kThreadsRunning = "threadsRunning"_sd;
-constexpr auto kExecutorLabel = "executor"_sd;
 constexpr auto kExecutorName = "fixed"_sd;

+constexpr auto kThreadsRunning = "threadsRunning"_sd;
+constexpr auto kClientsInTotal = "clientsInTotal"_sd;
+constexpr auto kClientsRunning = "clientsRunning"_sd;
+constexpr auto kClientsWaiting = "clientsWaitingForData"_sd;
+
 const auto getServiceExecutorFixed =
-    ServiceContext::declareDecoration<std::unique_ptr<ServiceExecutorFixed>>();
+    ServiceContext::declareDecoration<std::shared_ptr<ServiceExecutorFixed>>();

 const auto serviceExecutorFixedRegisterer = ServiceContext::ConstructorActionRegisterer{
    "ServiceExecutorFixed", [](ServiceContext* ctx) {
+        auto limits = ThreadPool::Limits{};
+        limits.minThreads = 0;
+        limits.maxThreads = fixedServiceExecutorThreadLimit;
        getServiceExecutorFixed(ctx) =
-            std::make_unique<ServiceExecutorFixed>(ThreadPool::Options{});
+            std::make_shared<ServiceExecutorFixed>(ctx, std::move(limits));
    }};
 }  // namespace

-ServiceExecutorFixed::ServiceExecutorFixed(ThreadPool::Options options)
-    : _options(std::move(options)) {
-    _options.onCreateThread =
-        [this, onCreate = std::move(_options.onCreateThread)](const std::string& name) mutable {
-            _executorContext = std::make_unique<ExecutorThreadContext>(this->weak_from_this());
-            if (onCreate) {
-                onCreate(name);
-            }
-        };
-    _threadPool = std::make_unique<ThreadPool>(_options);
+class ServiceExecutorFixed::ExecutorThreadContext {
+public:
+    ExecutorThreadContext(ServiceExecutorFixed* serviceExecutor);
+    ~ExecutorThreadContext();
+
+    ExecutorThreadContext(ExecutorThreadContext&&) = delete;
+    ExecutorThreadContext(const ExecutorThreadContext&) = delete;
+
+    template <typename Task>
+    void run(Task&& task) {
+        // Yield here to improve concurrency, especially when there are more executor threads
+        // than CPU cores.
+        stdx::this_thread::yield();
+        _executor->_stats.tasksStarted.fetchAndAdd(1);
+        _recursionDepth++;
+
+        ON_BLOCK_EXIT([&] {
+            _recursionDepth--;
+            _executor->_stats.tasksEnded.fetchAndAdd(1);
+
+            auto lk = stdx::lock_guard(_executor->_mutex);
+            _executor->_checkForShutdown(lk);
+        });
+
+        std::forward<Task>(task)();
+    }
+
+    int getRecursionDepth() const {
+        return _recursionDepth;
+    }
+
+private:
+    ServiceExecutorFixed* const _executor;
+    int _recursionDepth = 0;
+};
+
+ServiceExecutorFixed::ExecutorThreadContext::ExecutorThreadContext(
+    ServiceExecutorFixed* serviceExecutor)
+    : _executor(serviceExecutor) {
+    _executor->_stats.threadsStarted.fetchAndAdd(1);
+    hangAfterServiceExecutorFixedExecutorThreadsStart.pauseWhileSet();
+}
+
+ServiceExecutorFixed::ExecutorThreadContext::~ExecutorThreadContext() {
+    auto ended = _executor->_stats.threadsEnded.addAndFetch(1);
+    auto started = _executor->_stats.threadsStarted.loadRelaxed();
+    if (ended == started) {
+        hangBeforeServiceExecutorFixedLastExecutorThreadReturns.pauseWhileSet();
+    }
+}
+
+thread_local std::unique_ptr<ServiceExecutorFixed::ExecutorThreadContext>
+    ServiceExecutorFixed::_executorContext;
+
+ServiceExecutorFixed::ServiceExecutorFixed(ServiceContext* ctx, ThreadPool::Limits limits)
+    : _svcCtx{ctx}, _options(std::move(limits)) {
+    _options.poolName = "ServiceExecutorFixed";
+    _options.onCreateThread = [this](const auto&) {
+        _executorContext = std::make_unique<ExecutorThreadContext>(this);
+    };
+
+    _threadPool = std::make_shared<ThreadPool>(_options);
 }

 ServiceExecutorFixed::~ServiceExecutorFixed() {
-    invariant(!_canScheduleWork.load());
-    if (_state == State::kNotStarted)
-        return;
+    switch (_state) {
+        case State::kNotStarted:
+            return;
+        case State::kRunning: {
+            // We should not be running while in this destructor.
+            MONGO_UNREACHABLE;
+        }
+        case State::kStopping:
+        case State::kStopped: {
+            // We can go ahead and attempt to join our thread pool.
+        } break;
+        default: { MONGO_UNREACHABLE; }
+    }

-    // Ensures we always call "shutdown" after staring the service executor
-    invariant(_state == State::kStopped);
+    LOGV2_DEBUG(4910502,
+                kDiagnosticLogLevel,
+                "Shutting down pool for fixed thread-pool service executor",
+                "name"_attr = _options.poolName);
+
+    // We only can desturct when we have joined all of our tasks and canceled all of our sessions.
+    // This thread pool doesn't get to refuse work over its lifetime. It's possible that tasks are
+    // stiil blocking. If so, we block until they finish here.
    _threadPool->shutdown();
    _threadPool->join();
-    invariant(_numRunningExecutorThreads.load() == 0);
+
+    invariant(_threadsRunning() == 0);
+    invariant(_tasksRunning() == 0);
+    invariant(_tasksWaiting() == 0);
 }

 Status ServiceExecutorFixed::start() {
-    stdx::lock_guard<Latch> lk(_mutex);
-    auto oldState = std::exchange(_state, State::kRunning);
-    invariant(oldState == State::kNotStarted);
+    {
+        stdx::lock_guard<Latch> lk(_mutex);
+        switch (_state) {
+            case State::kNotStarted: {
+                // Time to start
+                _state = State::kRunning;
+            } break;
+            case State::kRunning: {
+                return Status::OK();
+            }
+            case State::kStopping:
+            case State::kStopped: {
+                return {ErrorCodes::ServiceExecutorInShutdown,
+                        "ServiceExecutorFixed is already stopping or stopped"};
+            }
+            default: { MONGO_UNREACHABLE; }
+        };
+    }
+
+    LOGV2_DEBUG(4910501,
+                kDiagnosticLogLevel,
+                "Starting fixed thread-pool service executor",
+                "name"_attr = _options.poolName);
+
    _threadPool->startup();
-    _canScheduleWork.store(true);
-    LOGV2_DEBUG(
-        4910501, 3, "Started fixed thread-pool service executor", "name"_attr = _options.poolName);
+
+    if (!_svcCtx) {
+        // For some tests, we do not have a ServiceContext.
+        invariant(TestingProctor::instance().isEnabled());
+        return Status::OK();
+    }
+
+    auto tl = _svcCtx->getTransportLayer();
+    invariant(tl);
+
+    auto reactor = tl->getReactor(TransportLayer::WhichReactor::kIngress);
+    invariant(reactor);
+    _threadPool->schedule([this, reactor](Status) {
+        {
+            // Check to make sure we haven't been shutdown already. Note that there is still a brief
+            // race that immediately follows this check. ASIOReactor::stop() is not permanent, thus
+            // our run() could "restart" the reactor.
+            stdx::lock_guard<Latch> lk(_mutex);
+            if (_state != kRunning) {
+                return;
+            }
+        }
+
+        // Start running on the reactor immediately.
+        reactor->run();
+    });
+
    return Status::OK();
 }

@@ -101,37 +227,115 @@ ServiceExecutorFixed* ServiceExecutorFixed::get(ServiceContext* ctx) {
 }

 Status ServiceExecutorFixed::shutdown(Milliseconds timeout) {
-    auto waitForShutdown = [&]() mutable -> Status {
-        stdx::unique_lock<Latch> lk(_mutex);
-        bool success = _shutdownCondition.wait_for(lk, timeout.toSystemDuration(), [this] {
-            return _numRunningExecutorThreads.load() == 0;
-        });
-        return success ? Status::OK()
-                       : Status(ErrorCodes::ExceededTimeLimit,
-                                "Failed to shutdown all executor threads within the time limit");
-    };
-
-    LOGV2_DEBUG(4910502,
-                3,
+    LOGV2_DEBUG(4910503,
+                kDiagnosticLogLevel,
                "Shutting down fixed thread-pool service executor",
                "name"_attr = _options.poolName);

    {
-        stdx::lock_guard<Latch> lk(_mutex);
-        _canScheduleWork.store(false);
+        auto lk = stdx::unique_lock(_mutex);

-        auto oldState = std::exchange(_state, State::kStopped);
-        if (oldState != State::kStopped) {
-            _threadPool->shutdown();
+        switch (_state) {
+            case State::kNotStarted:
+            case State::kRunning: {
+                _state = State::kStopping;
+
+                for (auto& waiter : _waiters) {
+                    // Cancel any session we own.
+                    waiter.session->cancelAsyncOperations();
+                }
+
+                // There may not be outstanding threads, check for shutdown now.
+                _checkForShutdown(lk);
+
+                if (_state == State::kStopped) {
+                    // We were able to become stopped immediately.
+                    return Status::OK();
+                }
+            } break;
+            case State::kStopping: {
+                // Just nead to wait it out.
+            } break;
+            case State::kStopped: {
+                // Totally done.
+                return Status::OK();
+            } break;
+            default: { MONGO_UNREACHABLE; }
        }
    }

-    return waitForShutdown();
+    LOGV2_DEBUG(4910504,
+                kDiagnosticLogLevel,
+                "Waiting for shutdown of fixed thread-pool service executor",
+                "name"_attr = _options.poolName);
+
+    // There is a world where we are able to simply do a timed wait upon a future chain. However,
+    // that world likely requires an OperationContext available through shutdown.
+    auto lk = stdx::unique_lock(_mutex);
+    if (!_shutdownCondition.wait_for(
+            lk, timeout.toSystemDuration(), [this] { return _state == State::kStopped; })) {
+        return Status(ErrorCodes::ExceededTimeLimit,
+                      "Failed to shutdown all executor threads within the time limit");
+    }
+
+    return Status::OK();
+}
+
+void ServiceExecutorFixed::_checkForShutdown(WithLock) {
+    if (_state == State::kRunning) {
+        // We're actively running.
+        return;
+    }
+    invariant(_state != State::kNotStarted);
+
+    if (!_waiters.empty()) {
+        // We still have some in wait.
+        return;
+    }
+
+    auto tasksLeft = _tasksLeft();
+    if (tasksLeft > 0) {
+        // We have tasks remaining.
+        return;
+    }
+    invariant(tasksLeft == 0);
+
+    // We have achieved a soft form of shutdown:
+    // - _state != kRunning means that there will be no new external tasks or waiters.
+    // - _waiters.empty() means that all network waits have finished and there will be no new
+    //   internal tasks.
+    // - _tasksLeft() == 0 means that all tasks, both internal and external have finished.
+    //
+    // From this point on, all of our threads will be idle. When the dtor runs, the thread pool will
+    // experience a trivial shutdown() and join().
+    _state = State::kStopped;
+
+    LOGV2_DEBUG(
+        4910505, kDiagnosticLogLevel, "Finishing shutdown", "name"_attr = _options.poolName);
+    _shutdownCondition.notify_one();
+
+    if (!_svcCtx) {
+        // For some tests, we do not have a ServiceContext.
+        invariant(TestingProctor::instance().isEnabled());
+        return;
+    }
+
+    auto tl = _svcCtx->getTransportLayer();
+    invariant(tl);
+
+    auto reactor = tl->getReactor(TransportLayer::WhichReactor::kIngress);
+    invariant(reactor);
+    reactor->stop();
 }

 Status ServiceExecutorFixed::scheduleTask(Task task, ScheduleFlags flags) {
-    if (!_canScheduleWork.load()) {
-        return Status(ErrorCodes::ShutdownInProgress, "Executor is not running");
+    {
+        auto lk = stdx::unique_lock(_mutex);
+        if (_state != State::kRunning) {
+            return kInShutdown;
+        }
+
+        _stats.tasksScheduled.fetchAndAdd(1);
    }

    auto mayExecuteTaskInline = [&] {
@@ -155,29 +359,79 @@ Status ServiceExecutorFixed::scheduleTask(Task task, ScheduleFlags flags) {

    hangBeforeSchedulingServiceExecutorFixedTask.pauseWhileSet();

-    // May throw if an attempt is made to schedule after the thread pool is shutdown.
-    try {
-        _threadPool->schedule([task = std::move(task)](Status status) mutable {
-            internalAssert(status);
-            invariant(_executorContext);
-            _executorContext->run(std::move(task));
-        });
-    } catch (DBException& e) {
-        return e.toStatus();
-    }
+    _threadPool->schedule([this, task = std::move(task)](Status status) mutable {
+        invariant(status);
+
+        _executorContext->run([&] { task(); });
+    });

    return Status::OK();
 }

-void ServiceExecutorFixed::runOnDataAvailable(Session* session,
+void ServiceExecutorFixed::_schedule(OutOfLineExecutor::Task task) noexcept {
+    {
+        auto lk = stdx::unique_lock(_mutex);
+        if (_state != State::kRunning) {
+            lk.unlock();
+
+            task(kInShutdown);
+            return;
+        }
+
+        _stats.tasksScheduled.fetchAndAdd(1);
+    }
+
+    _threadPool->schedule([this, task = std::move(task)](Status status) mutable {
+        _executorContext->run([&] { task(std::move(status)); });
+    });
+}
+
+void ServiceExecutorFixed::runOnDataAvailable(const SessionHandle& session,
                                              OutOfLineExecutor::Task onCompletionCallback) {
    invariant(session);
-    session->waitForData().thenRunOn(shared_from_this()).getAsync(std::move(onCompletionCallback));
+
+    auto waiter = Waiter{session, std::move(onCompletionCallback)};
+
+    WaiterList::iterator it;
+    {
+        // Make sure we're still allowed to schedule and track the session
+        auto lk = stdx::unique_lock(_mutex);
+        if (_state != State::kRunning) {
+            lk.unlock();
+            waiter.onCompletionCallback(kInShutdown);
+            return;
+        }
+
+        it = _waiters.emplace(_waiters.end(), std::move(waiter));
+
+        _stats.waitersStarted.fetchAndAdd(1);
+    }
+
+    session->asyncWaitForData()
+        .thenRunOn(shared_from_this())
+        .getAsync([this, anchor = shared_from_this(), it](Status status) mutable {
+            Waiter waiter;
+            {
+                // Remove our waiter from the list.
+                auto lk = stdx::unique_lock(_mutex);
+                waiter = std::exchange(*it, {});
+                _waiters.erase(it);
+
+                _stats.waitersEnded.fetchAndAdd(1);
+            }
+
+            waiter.onCompletionCallback(std::move(status));
+        });
 }

 void ServiceExecutorFixed::appendStats(BSONObjBuilder* bob) const {
-    *bob << kExecutorLabel << kExecutorName << kThreadsRunning
-         << static_cast<int>(_numRunningExecutorThreads.load());
+    // The ServiceExecutorFixed schedules Clients temporarily onto its threads and waits
+    // asynchronously.
+    BSONObjBuilder subbob = bob->subobjStart(kExecutorName);
+    subbob.append(kThreadsRunning, static_cast<int>(_threadsRunning()));
+    subbob.append(kClientsInTotal, static_cast<int>(_tasksTotal()));
+    subbob.append(kClientsRunning, static_cast<int>(_tasksRunning()));
+    subbob.append(kClientsWaiting, static_cast<int>(_tasksWaiting()));
 }

 int ServiceExecutorFixed::getRecursionDepthForExecutorThread() const {
--- a/src/mongo/transport/service_executor_fixed.h
+++ b/src/mongo/transport/service_executor_fixed.h
@@ -29,6 +29,7 @@

 #pragma once

+#include <boost/optional.hpp>
 #include <memory>

 #include "mongo/base/status.h"
@@ -37,8 +38,11 @@
 #include "mongo/platform/mutex.h"
 #include "mongo/stdx/condition_variable.h"
 #include "mongo/stdx/thread.h"
+#include "mongo/stdx/unordered_map.h"
 #include "mongo/transport/service_executor.h"
 #include "mongo/util/concurrency/thread_pool.h"
+#include "mongo/util/concurrency/with_lock.h"
+#include "mongo/util/future.h"
 #include "mongo/util/hierarchical_acquisition.h"

 namespace mongo {
@@ -49,10 +53,17 @@ namespace transport {
 * This executor always yields before executing scheduled tasks, and never yields before scheduling
 * new tasks (i.e., `ScheduleFlags::kMayYieldBeforeSchedule` is a no-op for this executor).
 */
-class ServiceExecutorFixed : public ServiceExecutor,
-                             public std::enable_shared_from_this<ServiceExecutorFixed> {
+class ServiceExecutorFixed final : public ServiceExecutor,
+                                   public std::enable_shared_from_this<ServiceExecutorFixed> {
+    static constexpr auto kDiagnosticLogLevel = 3;
+
+    static const inline auto kInShutdown =
+        Status(ErrorCodes::ServiceExecutorInShutdown, "ServiceExecutorFixed is not running");
+
 public:
-    explicit ServiceExecutorFixed(ThreadPool::Options options);
+    explicit ServiceExecutorFixed(ServiceContext* ctx, ThreadPool::Limits limits);
+    explicit ServiceExecutorFixed(ThreadPool::Limits limits)
+        : ServiceExecutorFixed(nullptr, std::move(limits)) {}
    virtual ~ServiceExecutorFixed();

    static ServiceExecutorFixed* get(ServiceContext* ctx);
@@ -60,8 +71,11 @@ public:
    Status start() override;
    Status shutdown(Milliseconds timeout) override;
    Status scheduleTask(Task task, ScheduleFlags flags) override;
+    void schedule(OutOfLineExecutor::Task task) override {
+        _schedule(std::move(task));
+    }

-    void runOnDataAvailable(Session* session,
+    void runOnDataAvailable(const SessionHandle& session,
                            OutOfLineExecutor::Task onCompletionCallback) override;

    Mode transportMode() const override {
@@ -78,62 +92,76 @@ public:

 private:
    // Maintains the execution state (e.g., recursion depth) for executor threads
-    class ExecutorThreadContext {
-    public:
-        ExecutorThreadContext(std::weak_ptr<ServiceExecutorFixed> serviceExecutor)
-            : _executor(std::move(serviceExecutor)) {
-            _adjustRunningExecutorThreads(1);
-        }
-
-        ExecutorThreadContext(ExecutorThreadContext&&) = delete;
-        ExecutorThreadContext(const ExecutorThreadContext&) = delete;
-
-        ~ExecutorThreadContext() {
-            _adjustRunningExecutorThreads(-1);
-        }
-
-        void run(ServiceExecutor::Task task) {
-            // Yield here to improve concurrency, especially when there are more executor threads
-            // than CPU cores.
-            stdx::this_thread::yield();
-            _recursionDepth++;
-            task();
-            _recursionDepth--;
-        }
-
-        int getRecursionDepth() const {
-            return _recursionDepth;
-        }
-
-    private:
-        void _adjustRunningExecutorThreads(int adjustment) {
-            if (auto executor = _executor.lock()) {
-                executor->_numRunningExecutorThreads.fetchAndAdd(adjustment);
-            }
-        }
-
-        int _recursionDepth = 0;
-        std::weak_ptr<ServiceExecutorFixed> _executor;
-    };
+    class ExecutorThreadContext;

 private:
-    AtomicWord<size_t> _numRunningExecutorThreads{0};
-    AtomicWord<bool> _canScheduleWork{false};
+    void _checkForShutdown(WithLock);
+    void _schedule(OutOfLineExecutor::Task task) noexcept;
+
+    auto _threadsRunning() const {
+        auto ended = _stats.threadsEnded.load();
+        auto started = _stats.threadsStarted.loadRelaxed();
+        return started - ended;
+    }
+
+    auto _tasksRunning() const {
+        auto ended = _stats.tasksEnded.load();
+        auto started = _stats.tasksStarted.loadRelaxed();
+        return started - ended;
+    }
+
+    auto _tasksLeft() const {
+        auto ended = _stats.tasksEnded.load();
+        auto scheduled = _stats.tasksScheduled.loadRelaxed();
+        return scheduled - ended;
+    }
+
+    auto _tasksWaiting() const {
+        auto ended = _stats.waitersEnded.load();
+        auto started = _stats.waitersStarted.loadRelaxed();
+        return started - ended;
+    }
+
+    auto _tasksTotal() const {
+        return _tasksRunning() + _tasksWaiting();
+    }
+
+    struct Stats {
+        AtomicWord<size_t> threadsStarted{0};
+        AtomicWord<size_t> threadsEnded{0};
+
+        AtomicWord<size_t> tasksScheduled{0};
+        AtomicWord<size_t> tasksStarted{0};
+        AtomicWord<size_t> tasksEnded{0};
+
+        AtomicWord<size_t> waitersStarted{0};
+        AtomicWord<size_t> waitersEnded{0};
+    };
+    Stats _stats;
+
+    ServiceContext* const _svcCtx;

    mutable Mutex _mutex =
        MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "ServiceExecutorFixed::_mutex");
    stdx::condition_variable _shutdownCondition;
+    SharedPromise<void> _shutdownComplete;

    /**
-     * State transition diagram: kNotStarted ---> kRunning ---> kStopped
-     * The service executor cannot be in "kRunning" when its destructor is invoked.
+     * State transition diagram: kNotStarted ---> kRunning ---> kStopping ---> kStopped
     */
-    enum State { kNotStarted, kRunning, kStopped } _state = kNotStarted;
+    enum State { kNotStarted, kRunning, kStopping, kStopped } _state = kNotStarted;

    ThreadPool::Options _options;
-    std::unique_ptr<ThreadPool> _threadPool;
+    std::shared_ptr<ThreadPool> _threadPool;

-    static inline thread_local std::unique_ptr<ExecutorThreadContext> _executorContext;
+    struct Waiter {
+        SessionHandle session;
+        OutOfLineExecutor::Task onCompletionCallback;
+    };
+    using WaiterList = std::list<Waiter>;
+    WaiterList _waiters;
+
+    static thread_local std::unique_ptr<ExecutorThreadContext> _executorContext;
 };

 }  // namespace transport
--- a/src/mongo/transport/service_executor_reserved.cpp
+++ b/src/mongo/transport/service_executor_reserved.cpp
@@ -45,11 +45,12 @@ namespace mongo {
 namespace transport {
 namespace {

-constexpr auto kThreadsRunning = "threadsRunning"_sd;
-constexpr auto kExecutorLabel = "executor"_sd;
 constexpr auto kExecutorName = "reserved"_sd;
-constexpr auto kReadyThreads = "readyThreads"_sd;
-constexpr auto kStartingThreads = "startingThreads"_sd;
+
+constexpr auto kThreadsRunning = "threadsRunning"_sd;
+constexpr auto kClientsInTotal = "clientsInTotal"_sd;
+constexpr auto kClientsRunning = "clientsRunning"_sd;
+constexpr auto kClientsWaiting = "clientsWaitingForData"_sd;

 const auto getServiceExecutorReserved =
    ServiceContext::declareDecoration<std::unique_ptr<ServiceExecutorReserved>>();
@@ -214,14 +215,32 @@ Status ServiceExecutorReserved::scheduleTask(Task task, ScheduleFlags flags) {
 }

 void ServiceExecutorReserved::appendStats(BSONObjBuilder* bob) const {
-    stdx::lock_guard<Latch> lk(_mutex);
-    *bob << kExecutorLabel << kExecutorName << kThreadsRunning
-         << static_cast<int>(_numRunningWorkerThreads.loadRelaxed()) << kReadyThreads
-         << static_cast<int>(_numReadyThreads) << kStartingThreads
-         << static_cast<int>(_numStartingThreads);
+    // The ServiceExecutorReserved loans a thread to one client for its lifetime and waits
+    // synchronously on thread.
+    struct Statlet {
+        int threads;
+        int total;
+        int running;
+        int waiting;
+    };
+
+    auto statlet = [&] {
+        stdx::lock_guard lk(_mutex);
+        auto threads = static_cast<int>(_numRunningWorkerThreads.loadRelaxed());
+        auto total = static_cast<int>(threads - _numReadyThreads - _numStartingThreads);
+        auto running = total;
+        auto waiting = 0;
+        return Statlet{threads, total, running, waiting};
+    }();
+
+    BSONObjBuilder subbob = bob->subobjStart(kExecutorName);
+    subbob.append(kThreadsRunning, statlet.threads);
+    subbob.append(kClientsInTotal, statlet.total);
+    subbob.append(kClientsRunning, statlet.running);
+    subbob.append(kClientsWaiting, statlet.waiting);
 }

-void ServiceExecutorReserved::runOnDataAvailable(Session* session,
+void ServiceExecutorReserved::runOnDataAvailable(const SessionHandle& session,
                                                 OutOfLineExecutor::Task onCompletionCallback) {
    scheduleCallbackOnDataAvailable(session, std::move(onCompletionCallback), this);
 }
--- a/src/mongo/transport/service_executor_reserved.h
+++ b/src/mongo/transport/service_executor_reserved.h
@@ -65,7 +65,7 @@ public:
        return Mode::kSynchronous;
    }

-    void runOnDataAvailable(Session* session,
+    void runOnDataAvailable(const SessionHandle& session,
                            OutOfLineExecutor::Task onCompletionCallback) override;

    void appendStats(BSONObjBuilder* bob) const override;
--- a/src/mongo/transport/service_executor_synchronous.cpp
+++ b/src/mongo/transport/service_executor_synchronous.cpp
@@ -43,10 +43,13 @@
 namespace mongo {
 namespace transport {
 namespace {
-constexpr auto kThreadsRunning = "threadsRunning"_sd;
-constexpr auto kExecutorLabel = "executor"_sd;
 constexpr auto kExecutorName = "passthrough"_sd;

+constexpr auto kThreadsRunning = "threadsRunning"_sd;
+constexpr auto kClientsInTotal = "clientsInTotal"_sd;
+constexpr auto kClientsRunning = "clientsRunning"_sd;
+constexpr auto kClientsWaiting = "clientsWaitingForData"_sd;
+
 const auto getServiceExecutorSynchronous =
    ServiceContext::declareDecoration<std::unique_ptr<ServiceExecutorSynchronous>>();

@@ -152,11 +155,17 @@ Status ServiceExecutorSynchronous::scheduleTask(Task task, ScheduleFlags flags)
 }

 void ServiceExecutorSynchronous::appendStats(BSONObjBuilder* bob) const {
-    *bob << kExecutorLabel << kExecutorName << kThreadsRunning
-         << static_cast<int>(_numRunningWorkerThreads.loadRelaxed());
+    // The ServiceExecutorSynchronous has one client per thread and waits synchronously on thread.
+    auto threads = static_cast<int>(_numRunningWorkerThreads.loadRelaxed());
+
+    BSONObjBuilder subbob = bob->subobjStart(kExecutorName);
+    subbob.append(kThreadsRunning, threads);
+    subbob.append(kClientsInTotal, threads);
+    subbob.append(kClientsRunning, threads);
+    subbob.append(kClientsWaiting, 0);
 }

-void ServiceExecutorSynchronous::runOnDataAvailable(Session* session,
+void ServiceExecutorSynchronous::runOnDataAvailable(const SessionHandle& session,
                                                    OutOfLineExecutor::Task onCompletionCallback) {
    scheduleCallbackOnDataAvailable(session, std::move(onCompletionCallback), this);
 }
--- a/src/mongo/transport/service_executor_synchronous.h
+++ b/src/mongo/transport/service_executor_synchronous.h
@@ -60,7 +60,7 @@ public:
        return Mode::kSynchronous;
    }

-    void runOnDataAvailable(Session* session,
+    void runOnDataAvailable(const SessionHandle& session,
                            OutOfLineExecutor::Task onCompletionCallback) override;

    void appendStats(BSONObjBuilder* bob) const override;
--- a/src/mongo/transport/service_executor_test.cpp
+++ b/src/mongo/transport/service_executor_test.cpp
@@ -36,6 +36,7 @@

 #include "mongo/bson/bsonobjbuilder.h"
 #include "mongo/db/service_context.h"
+#include "mongo/db/service_context_test_fixture.h"
 #include "mongo/logv2/log.h"
 #include "mongo/transport/mock_session.h"
 #include "mongo/transport/service_executor_fixed.h"
@@ -168,7 +169,7 @@ TEST_F(ServiceExecutorSynchronousFixture, ScheduleFailsBeforeStartup) {
    scheduleBasicTask(executor.get(), false);
 }

-class ServiceExecutorFixedFixture : public unittest::Test {
+class ServiceExecutorFixedFixture : public ServiceContextTest {
 public:
    static constexpr auto kNumExecutorThreads = 2;

@@ -183,10 +184,9 @@ public:
        ServiceExecutorHandle(ServiceExecutorHandle&&) = delete;

        explicit ServiceExecutorHandle(int flags = kNone) : _skipShutdown(flags & kSkipShutdown) {
-            ThreadPool::Options options;
-            options.minThreads = options.maxThreads = kNumExecutorThreads;
-            options.poolName = "Test";
-            _executor = std::make_shared<ServiceExecutorFixed>(std::move(options));
+            ThreadPool::Limits limits;
+            limits.minThreads = limits.maxThreads = kNumExecutorThreads;
+            _executor = std::make_shared<ServiceExecutorFixed>(std::move(limits));

            if (flags & kStartExecutor) {
                ASSERT_OK(_executor->start());
@@ -221,8 +221,12 @@ TEST_F(ServiceExecutorFixedFixture, ScheduleFailsBeforeStartup) {
 }

 DEATH_TEST_F(ServiceExecutorFixedFixture, DestructorFailsBeforeShutdown, "invariant") {
+    FailPointEnableBlock failpoint("hangAfterServiceExecutorFixedExecutorThreadsStart");
    ServiceExecutorHandle executorHandle(ServiceExecutorHandle::kStartExecutor |
                                         ServiceExecutorHandle::kSkipShutdown);
+    // The following ensures `executorHandle` holds the only reference to the service executor, thus
+    // returning from this block would trigger destruction of the executor.
+    failpoint->waitForTimesEntered(kNumExecutorThreads);
 }

 TEST_F(ServiceExecutorFixedFixture, BasicTaskRuns) {
@@ -297,54 +301,41 @@ TEST_F(ServiceExecutorFixedFixture, ShutdownTimeLimit) {
    mayReturn->emplaceValue();
 }

-TEST_F(ServiceExecutorFixedFixture, Stats) {
+TEST_F(ServiceExecutorFixedFixture, ScheduleSucceedsBeforeShutdown) {
    ServiceExecutorHandle executorHandle(ServiceExecutorHandle::kStartExecutor);
-    auto rendezvousBarrier = std::make_shared<unittest::Barrier>(kNumExecutorThreads + 1);
-    auto returnBarrier = std::make_shared<unittest::Barrier>(kNumExecutorThreads + 1);

-    auto task = [rendezvousBarrier, returnBarrier]() mutable {
-        rendezvousBarrier->countDownAndWait();
-        // Executor threads wait here for the main thread to test "executor->appendStats()".
-        returnBarrier->countDownAndWait();
-    };
+    auto thread = stdx::thread();
+    auto barrier = std::make_shared<unittest::Barrier>(2);
+    {
+        FailPointEnableBlock failpoint("hangBeforeSchedulingServiceExecutorFixedTask");

-    for (auto i = 0; i < kNumExecutorThreads; i++) {
-        ASSERT_OK(executorHandle->scheduleTask(task, ServiceExecutor::kEmptyFlags));
+
+        // The executor accepts the work, but hasn't used the underlying pool yet.
+        thread = stdx::thread([&] {
+            ASSERT_OK(executorHandle->scheduleTask([&, barrier] { barrier->countDownAndWait(); },
+                                                   ServiceExecutor::kEmptyFlags));
+        });
+        failpoint->waitForTimesEntered(1);
+
+        // Trigger an immediate shutdown which will not affect the task we have accepted.
+        ASSERT_NOT_OK(executorHandle->shutdown(Milliseconds{0}));
    }

-    // The main thread waits for the executor threads to bump up "threadsRunning" while picking up a
-    // task to execute. Once all executor threads are running (rendezvous) and the main thread is
-    // done testing the stats, the main thread will unblock them through "returnBarrier".
-    rendezvousBarrier->countDownAndWait();
+    // Our failpoint has been disabled, so the task can run to completion.
+    barrier->countDownAndWait();

-    BSONObjBuilder bob;
-    executorHandle->appendStats(&bob);
-    auto obj = bob.obj();
-    ASSERT(obj.hasField("threadsRunning"));
-    auto threadsRunning = obj.getIntField("threadsRunning");
-    ASSERT_EQ(threadsRunning, static_cast<int>(ServiceExecutorFixedFixture::kNumExecutorThreads));
+    // Now we can wait for the task to finish and shutdown.
+    ASSERT_OK(executorHandle->shutdown(kShutdownTime));

-    returnBarrier->countDownAndWait();
+    thread.join();
 }

 TEST_F(ServiceExecutorFixedFixture, ScheduleFailsAfterShutdown) {
    ServiceExecutorHandle executorHandle(ServiceExecutorHandle::kStartExecutor);
-    std::unique_ptr<stdx::thread> schedulerThread;

-    {
-        // Spawn a thread to schedule a task, and block it before it can schedule the task with the
-        // underlying thread-pool. Then shutdown the service executor and unblock the scheduler
-        // thread. This order of events must cause "schedule()" to return a non-okay status.
-        FailPointEnableBlock failpoint("hangBeforeSchedulingServiceExecutorFixedTask");
-        schedulerThread = std::make_unique<stdx::thread>([executor = *executorHandle] {
-            ASSERT_NOT_OK(
-                executor->scheduleTask([] { MONGO_UNREACHABLE; }, ServiceExecutor::kEmptyFlags));
-        });
-        failpoint->waitForTimesEntered(1);
-        ASSERT_OK(executorHandle->shutdown(kShutdownTime));
-    }
-
-    schedulerThread->join();
+    ASSERT_OK(executorHandle->shutdown(kShutdownTime));
+    ASSERT_NOT_OK(
+        executorHandle->scheduleTask([] { MONGO_UNREACHABLE; }, ServiceExecutor::kEmptyFlags));
 }

 TEST_F(ServiceExecutorFixedFixture, RunTaskAfterWaitingForData) {
@@ -357,7 +348,7 @@ TEST_F(ServiceExecutorFixedFixture, RunTaskAfterWaitingForData) {
    AtomicWord<bool> ranOnDataAvailable{false};
    auto barrier = std::make_shared<unittest::Barrier>(2);
    executorHandle->runOnDataAvailable(
-        session.get(), [&ranOnDataAvailable, mainThreadId, barrier](Status) mutable -> void {
+        session, [&ranOnDataAvailable, mainThreadId, barrier](Status) mutable -> void {
            ranOnDataAvailable.store(true);
            ASSERT(stdx::this_thread::get_id() != mainThreadId);
            barrier->countDownAndWait();
@@ -369,5 +360,30 @@ TEST_F(ServiceExecutorFixedFixture, RunTaskAfterWaitingForData) {
    ASSERT(ranOnDataAvailable.load());
 }

+TEST_F(ServiceExecutorFixedFixture, StartAndShutdownAreDeterministic) {
+
+    std::unique_ptr<ServiceExecutorHandle> handle;
+
+    // Ensure starting the executor results in spawning the specified number of executor threads.
+    {
+        FailPointEnableBlock failpoint("hangAfterServiceExecutorFixedExecutorThreadsStart");
+        handle = std::make_unique<ServiceExecutorHandle>(ServiceExecutorHandle::kNone);
+        ASSERT_OK((*handle)->start());
+        failpoint->waitForTimesEntered(kNumExecutorThreads);
+    }
+
+    // Since destroying ServiceExecutorFixed is blocking, spawn a thread to issue the destruction
+    // off of the main execution path.
+    stdx::thread shutdownThread;
+
+    // Ensure all executor threads return after receiving the shutdown signal.
+    {
+        FailPointEnableBlock failpoint("hangBeforeServiceExecutorFixedLastExecutorThreadReturns");
+        shutdownThread = stdx::thread{[handle = std::move(handle)]() mutable { handle.reset(); }};
+        failpoint->waitForTimesEntered(1);
+    }
+    shutdownThread.join();
+}
+
 }  // namespace
 }  // namespace mongo
--- a/src/mongo/transport/service_executor_utils.cpp
+++ b/src/mongo/transport/service_executor_utils.cpp
@@ -146,16 +146,18 @@ Status launchServiceWorkerThread(unique_function<void()> task) noexcept {
    return Status::OK();
 }

-void scheduleCallbackOnDataAvailable(transport::Session* session,
+void scheduleCallbackOnDataAvailable(const transport::SessionHandle& session,
                                     unique_function<void(Status)> callback,
                                     transport::ServiceExecutor* executor) noexcept {
    invariant(session);
-    try {
-        session->waitForData().get();
-        executor->schedule(std::move(callback));
-    } catch (DBException& e) {
-        callback(e.toStatus());
-    }
+    executor->schedule([session, callback = std::move(callback)](Status status) {
+        if (!status.isOK()) {
+            callback(std::move(status));
+            return;
+        }
+
+        callback(session->waitForData());
+    });
 }

 }  // namespace mongo
--- a/src/mongo/transport/service_executor_utils.h
+++ b/src/mongo/transport/service_executor_utils.h
@@ -46,7 +46,7 @@ Status launchServiceWorkerThread(unique_function<void()> task) noexcept;
 * thread until data is available for reading. On success, it schedules "callback" on "executor".
 * Other implementations (e.g., "ServiceExecutorFixed") may provide asynchronous variants.
 */
-void scheduleCallbackOnDataAvailable(transport::Session* session,
+void scheduleCallbackOnDataAvailable(const transport::SessionHandle& session,
                                     unique_function<void(Status)> callback,
                                     transport::ServiceExecutor* executor) noexcept;

--- a/src/mongo/transport/service_state_machine.cpp
+++ b/src/mongo/transport/service_state_machine.cpp
@@ -35,13 +35,20 @@

 #include <memory>

+#include "mongo/base/status.h"
 #include "mongo/config.h"
+#include "mongo/db/client.h"
+#include "mongo/db/client_strand.h"
 #include "mongo/db/dbmessage.h"
 #include "mongo/db/stats/counters.h"
 #include "mongo/db/traffic_recorder.h"
 #include "mongo/logv2/log.h"
+#include "mongo/platform/atomic_word.h"
+#include "mongo/platform/mutex.h"
 #include "mongo/rpc/message.h"
 #include "mongo/rpc/op_msg.h"
+#include "mongo/stdx/thread.h"
+#include "mongo/transport/message_compressor_base.h"
 #include "mongo/transport/message_compressor_manager.h"
 #include "mongo/transport/service_entry_point.h"
 #include "mongo/transport/service_executor_fixed.h"
@@ -50,11 +57,12 @@
 #include "mongo/transport/transport_layer.h"
 #include "mongo/util/assert_util.h"
 #include "mongo/util/concurrency/idle_thread_block.h"
-#include "mongo/util/concurrency/thread_name.h"
 #include "mongo/util/debug_util.h"
 #include "mongo/util/exit.h"
 #include "mongo/util/fail_point.h"
+#include "mongo/util/future.h"
 #include "mongo/util/net/socket_exception.h"
+#include "mongo/util/net/ssl_manager.h"
 #include "mongo/util/net/ssl_peer_info.h"
 #include "mongo/util/quick_exit.h"

@@ -163,163 +171,155 @@ Message makeExhaustMessage(Message requestMsg, DbResponse* dbresponse) {
 }
 }  // namespace

-using transport::ServiceExecutor;
-using transport::TransportLayer;
-
-/*
- * This class wraps up the logic for swapping/unswapping the Client when transitioning
- * between states.
- *
- * In debug builds this also ensures that only one thread is working on the SSM at once.
- */
-class ServiceStateMachine::ThreadGuard {
-    ThreadGuard(ThreadGuard&) = delete;
-    ThreadGuard& operator=(ThreadGuard&) = delete;
-
+class ServiceStateMachine::Impl final
+    : public std::enable_shared_from_this<ServiceStateMachine::Impl> {
 public:
-    explicit ThreadGuard(ServiceStateMachine* ssm) : _ssm{ssm} {
-        auto owned = Ownership::kUnowned;
-        _ssm->_owned.compareAndSwap(&owned, Ownership::kOwned);
-        if (owned == Ownership::kStatic) {
-            dassert(haveClient());
-            dassert(Client::getCurrent() == _ssm->_dbClientPtr);
-            _haveTakenOwnership = true;
-            return;
-        }
-
-#ifdef MONGO_CONFIG_DEBUG_BUILD
-        invariant(owned == Ownership::kUnowned);
-        _ssm->_owningThread.store(stdx::this_thread::get_id());
-#endif
-
-        // Set up the thread name
-        auto oldThreadName = getThreadName();
-        const auto& threadName = _ssm->_dbClient->desc();
-        if (oldThreadName != threadName) {
-            _oldThreadName = oldThreadName.toString();
-            setThreadName(threadName);
-        }
-
-        // Swap the current Client so calls to cc() work as expected
-        Client::setCurrent(std::move(_ssm->_dbClient));
-        _haveTakenOwnership = true;
-    }
-
-    // Constructing from a moved ThreadGuard invalidates the other thread guard.
-    ThreadGuard(ThreadGuard&& other)
-        : _ssm(other._ssm), _haveTakenOwnership(other._haveTakenOwnership) {
-        other._haveTakenOwnership = false;
-    }
-
-    ThreadGuard& operator=(ThreadGuard&& other) {
-        if (this != &other) {
-            _ssm = other._ssm;
-            _haveTakenOwnership = other._haveTakenOwnership;
-            other._haveTakenOwnership = false;
-        }
-        return *this;
+    /*
+     * Any state may transition to EndSession in case of an error, otherwise the valid state
+     * transitions are:
+     * Source -> SourceWait -> Process -> SinkWait -> Source (standard RPC)
+     * Source -> SourceWait -> Process -> SinkWait -> Process -> SinkWait ... (exhaust)
+     * Source -> SourceWait -> Process -> Source (fire-and-forget)
+     */
+    enum class State {
+        Created,     // The session has been created, but no operations have been performed yet
+        Source,      // Request a new Message from the network to handle
+        SourceWait,  // Wait for the new Message to arrive from the network
+        Process,     // Run the Message through the database
+        SinkWait,    // Wait for the database result to be sent by the network
+        EndSession,  // End the session - the ServiceStateMachine will be invalid after this
+        Ended        // The session has ended. It is illegal to call any method besides
+                     // state() if this is the current state.
    };

-    ThreadGuard() = delete;
+    /*
+     * When start() is called with Ownership::kOwned, the SSM will swap the Client/thread name
+     * whenever it runs a stage of the state machine, and then unswap them out when leaving the SSM.
+     *
+     * With Ownership::kStatic, it will assume that the SSM will only ever be run from one thread,
+     * and that thread will not be used for other SSM's. It will swap in the Client/thread name for
+     * the first run and leave them in place.
+     *
+     * kUnowned is used internally to mark that the SSM is inactive.
+     */
+    enum class Ownership { kUnowned, kOwned, kStatic };

-    ~ThreadGuard() {
-        if (_haveTakenOwnership)
-            release();
+    Impl(ServiceContext::UniqueClient client)
+        : _state{State::Created},
+          _serviceContext{client->getServiceContext()},
+          _sep{_serviceContext->getServiceEntryPoint()},
+          _clientStrand{ClientStrand::make(std::move(client))} {}
+
+    void start(ServiceExecutorContext seCtx);
+
+    void setCleanupHook(std::function<void()> hook);
+
+    /*
+     * Terminates the associated transport Session, regardless of tags.
+     *
+     * This will not block on the session terminating cleaning itself up, it returns immediately.
+     */
+    void terminate();
+
+    /*
+     * Terminates the associated transport Session if its tags don't match the supplied tags.  If
+     * the session is in a pending state, before any tags have been set, it will not be terminated.
+     *
+     * This will not block on the session terminating cleaning itself up, it returns immediately.
+     */
+    void terminateIfTagsDontMatch(transport::Session::TagMask tags);
+
+    /*
+     * Terminates the associated transport Session if status indicate error.
+     *
+     * This will not block on the session terminating cleaning itself up, it returns immediately.
+     */
+    void terminateAndLogIfError(Status status);
+
+    /*
+     * This function actually calls into the database and processes a request. It's broken out
+     * into its own inline function for better readability.
+     */
+    Future<void> processMessage();
+
+    /*
+     * These get called by the TransportLayer when requested network I/O has completed.
+     */
+    void sourceCallback(Status status);
+    void sinkCallback(Status status);
+
+    /*
+     * Source/Sink message from the TransportLayer.
+     */
+    Future<void> sourceMessage();
+    Future<void> sinkMessage();
+
+    /*
+     * Releases all the resources associated with the session and call the cleanupHook.
+     */
+    void cleanupSession(const Status& status);
+
+    /*
+     * This is the initial function called at the beginning of a thread's lifecycle in the
+     * TransportLayer.
+     */
+    void runOnce();
+
+    /*
+     * Releases all the resources associated with the exhaust request.
+     */
+    void cleanupExhaustResources() noexcept;
+
+    /*
+     * Gets the current state of connection for testing/diagnostic purposes.
+     */
+    State state() const {
+        return _state.load();
    }

-    explicit operator bool() const {
-#ifdef MONGO_CONFIG_DEBUG_BUILD
-        if (_haveTakenOwnership) {
-            invariant(_ssm->_owned.load() != Ownership::kUnowned);
-            invariant(_ssm->_owningThread.load() == stdx::this_thread::get_id());
-            return true;
-        } else {
-            return false;
-        }
-#else
-        return _haveTakenOwnership;
-#endif
+    /*
+     * Gets the transport::Session associated with this connection
+     */
+    const transport::SessionHandle& session() {
+        return _clientStrand->getClientPointer()->session();
    }

-    void markStaticOwnership() {
-        dassert(static_cast<bool>(*this));
-        _ssm->_owned.store(Ownership::kStatic);
-    }
-
-    void release() {
-        auto owned = _ssm->_owned.load();
-
-#ifdef MONGO_CONFIG_DEBUG_BUILD
-        dassert(_haveTakenOwnership);
-        dassert(owned != Ownership::kUnowned);
-        dassert(_ssm->_owningThread.load() == stdx::this_thread::get_id());
-#endif
-        if (owned != Ownership::kStatic) {
-            if (haveClient()) {
-                _ssm->_dbClient = Client::releaseCurrent();
-            }
-
-            if (!_oldThreadName.empty()) {
-                setThreadName(_oldThreadName);
-            }
-        }
-
-        // If the session has ended, then it's unsafe to do anything but call the cleanup hook.
-        if (_ssm->state() == State::Ended) {
-            // The cleanup hook gets moved out of _ssm->_cleanupHook so that it can only be called
-            // once.
-            auto cleanupHook = std::move(_ssm->_cleanupHook);
-            if (cleanupHook)
-                cleanupHook();
-
-            // It's very important that the Guard returns here and that the SSM's state does not
-            // get modified in any way after the cleanup hook is called.
-            return;
-        }
-
-        _haveTakenOwnership = false;
-        // If owned != Ownership::kOwned here then it can only equal Ownership::kStatic and we
-        // should just return
-        if (owned == Ownership::kOwned) {
-            _ssm->_owned.store(Ownership::kUnowned);
-        }
+    /*
+     * Gets the transport::ServiceExecutor associated with this connection.
+     */
+    ServiceExecutor* executor() {
+        return ServiceExecutorContext::get(_clientStrand->getClientPointer())->getServiceExecutor();
    }

 private:
-    ServiceStateMachine* _ssm;
-    bool _haveTakenOwnership = false;
-    std::string _oldThreadName;
+    AtomicWord<State> _state{State::Created};
+
+    ServiceContext* const _serviceContext;
+    ServiceEntryPoint* const _sep;
+
+    transport::SessionHandle _sessionHandle;
+    ClientStrandPtr _clientStrand;
+    std::function<void()> _cleanupHook;
+
+    bool _inExhaust = false;
+    boost::optional<MessageCompressorId> _compressorId;
+    Message _inMessage;
+    Message _outMessage;
 };

-ServiceStateMachine::ServiceStateMachine(ServiceContext::UniqueClient client)
-    : _state{State::Created},
-      _serviceContext{client->getServiceContext()},
-      _sep{_serviceContext->getServiceEntryPoint()},
-      _dbClient{std::move(client)},
-      _dbClientPtr{_dbClient.get()} {}
-
-const transport::SessionHandle& ServiceStateMachine::_session() const {
-    return _dbClientPtr->session();
-}
-
-ServiceExecutor* ServiceStateMachine::_executor() {
-    return ServiceExecutorContext::get(_dbClientPtr)->getServiceExecutor();
-}
-
-Future<void> ServiceStateMachine::_sourceMessage(ThreadGuard guard) {
+Future<void> ServiceStateMachine::Impl::sourceMessage() {
    invariant(_inMessage.empty());
    invariant(_state.load() == State::Source);
    _state.store(State::SourceWait);
-    guard.release();

    auto sourceMsgImpl = [&] {
-        const auto& transportMode = _executor()->transportMode();
+        const auto& transportMode = executor()->transportMode();
        if (transportMode == transport::Mode::kSynchronous) {
            MONGO_IDLE_THREAD_BLOCK;
-            return Future<Message>::makeReady(_session()->sourceMessage());
+            return Future<Message>::makeReady(session()->sourceMessage());
        } else {
            invariant(transportMode == transport::Mode::kAsynchronous);
-            return _session()->asyncSourceMessage();
+            return session()->asyncSourceMessage();
        }
    };

@@ -328,52 +328,47 @@ Future<void> ServiceStateMachine::_sourceMessage(ThreadGuard guard) {
            _inMessage = std::move(msg.getValue());
            invariant(!_inMessage.empty());
        }
-        _sourceCallback(msg.getStatus());
+        sourceCallback(msg.getStatus());
        return Status::OK();
    });
 }

-Future<void> ServiceStateMachine::_sinkMessage(ThreadGuard guard) {
+Future<void> ServiceStateMachine::Impl::sinkMessage() {
    // Sink our response to the client
    invariant(_state.load() == State::Process);
    _state.store(State::SinkWait);
-    guard.release();
    auto toSink = std::exchange(_outMessage, {});

    auto sinkMsgImpl = [&] {
-        const auto& transportMode = _executor()->transportMode();
+        const auto& transportMode = executor()->transportMode();
        if (transportMode == transport::Mode::kSynchronous) {
            // We don't consider ourselves idle while sending the reply since we are still doing
            // work on behalf of the client. Contrast that with sourceMessage() where we are waiting
            // for the client to send us more work to do.
-            return Future<void>::makeReady(_session()->sinkMessage(std::move(toSink)));
+            return Future<void>::makeReady(session()->sinkMessage(std::move(toSink)));
        } else {
            invariant(transportMode == transport::Mode::kAsynchronous);
-            return _session()->asyncSinkMessage(std::move(toSink));
+            return session()->asyncSinkMessage(std::move(toSink));
        }
    };

    return sinkMsgImpl().onCompletion([this](Status status) {
-        _sinkCallback(std::move(status));
+        sinkCallback(std::move(status));
        return Status::OK();
    });
 }

-void ServiceStateMachine::_sourceCallback(Status status) {
-    // The first thing to do is create a ThreadGuard which will take ownership of the SSM in this
-    // thread.
-    ThreadGuard guard(this);
+void ServiceStateMachine::Impl::sourceCallback(Status status) {
+    invariant(state() == State::SourceWait);

-    // Make sure we just called sourceMessage();
-    dassert(state() == State::SourceWait);
-    auto remote = _session()->remote();
+    auto remote = session()->remote();

    if (status.isOK()) {
        _state.store(State::Process);

-        // If the sourceMessage succeeded then we can move to on to process the message. We
-        // simply return from here and the future chain in _runOnce() will continue to the
-        // next state normally.
+        // If the sourceMessage succeeded then we can move to on to process the message. We simply
+        // return from here and the future chain in runOnce() will continue to the next state
+        // normally.

        // If any other issues arise, close the session.
    } else if (ErrorCodes::isInterruption(status.code()) ||
@@ -398,18 +393,14 @@ void ServiceStateMachine::_sourceCallback(Status status) {
              "Error receiving request from client. Ending connection from remote",
              "error"_attr = status,
              "remote"_attr = remote,
-              "connectionId"_attr = _session()->id());
+              "connectionId"_attr = session()->id());
        _state.store(State::EndSession);
    }
    uassertStatusOK(status);
 }

-void ServiceStateMachine::_sinkCallback(Status status) {
-    // The first thing to do is create a ThreadGuard which will take ownership of the SSM in this
-    // thread.
-    ThreadGuard guard(this);
-
-    dassert(state() == State::SinkWait);
+void ServiceStateMachine::Impl::sinkCallback(Status status) {
+    invariant(state() == State::SinkWait);

    // If there was an error sinking the message to the client, then we should print an error and
    // end the session.
@@ -420,8 +411,8 @@ void ServiceStateMachine::_sinkCallback(Status status) {
        LOGV2(22989,
              "Error sending response to client. Ending connection from remote",
              "error"_attr = status,
-              "remote"_attr = _session()->remote(),
-              "connectionId"_attr = _session()->id());
+              "remote"_attr = session()->remote(),
+              "connectionId"_attr = session()->id());
        _state.store(State::EndSession);
        uassertStatusOK(status);
    } else if (_inExhaust) {
@@ -431,13 +422,13 @@ void ServiceStateMachine::_sinkCallback(Status status) {
    }
 }

-Future<void> ServiceStateMachine::_processMessage(ThreadGuard guard) {
+Future<void> ServiceStateMachine::Impl::processMessage() {
    invariant(!_inMessage.empty());

    TrafficRecorder::get(_serviceContext)
-        .observe(_session(), _serviceContext->getPreciseClockSource()->now(), _inMessage);
+        .observe(session(), _serviceContext->getPreciseClockSource()->now(), _inMessage);

-    auto& compressorMgr = MessageCompressorManager::forSession(_session());
+    auto& compressorMgr = MessageCompressorManager::forSession(session());

    _compressorId = boost::none;
    if (_inMessage.operation() == dbCompressed) {
@@ -459,17 +450,17 @@ Future<void> ServiceStateMachine::_processMessage(ThreadGuard guard) {
    // The handleRequest is implemented in a subclass for mongod/mongos and actually all the
    // database work for this request.
    return _sep->handleRequest(opCtx.get(), _inMessage)
-        .then([this,
-               &compressorMgr = compressorMgr,
-               opCtx = std::move(opCtx),
-               guard = std::move(guard)](DbResponse dbresponse) mutable -> void {
+        .then([this, &compressorMgr = compressorMgr, opCtx = std::move(opCtx)](
+                  DbResponse dbresponse) mutable -> void {
            // opCtx must be killed and delisted here so that the operation cannot show up in
-            // currentOp results after the response reaches the client. The destruction is postponed
-            // for later to mitigate its performance impact on the critical path of execution.
+            // currentOp results after the response reaches the client. Destruction of the already
+            // killed opCtx is postponed for later (i.e., after completion of the future-chain) to
+            // mitigate its performance impact on the critical path of execution.
+            // Note that destroying futures after execution, rather that postponing the destruction
+            // until completion of the future-chain, would expose the cost of destroying opCtx to
+            // the critical path and result in serious performance implications.
            _serviceContext->killAndDelistOperation(opCtx.get(),
                                                    ErrorCodes::OperationIsKilledAndDelisted);
-            invariant(!_killedOpCtx);
-            _killedOpCtx = std::move(opCtx);

            // Format our response, if we have one
            Message& toSink = dbresponse.response;
@@ -482,7 +473,7 @@ Future<void> ServiceStateMachine::_processMessage(ThreadGuard guard) {
                toSink.header().setResponseToMsgId(_inMessage.header().getId());
                if (OpMsg::isFlagSet(_inMessage, OpMsg::kChecksumPresent)) {
 #ifdef MONGO_CONFIG_SSL
-                    if (!SSLPeerInfo::forSession(_session()).isTLS) {
+                    if (!SSLPeerInfo::forSession(session()).isTLS) {
                        OpMsg::appendChecksum(&toSink);
                    }
 #else
@@ -507,7 +498,7 @@ Future<void> ServiceStateMachine::_processMessage(ThreadGuard guard) {
                }

                TrafficRecorder::get(_serviceContext)
-                    .observe(_session(), _serviceContext->getPreciseClockSource()->now(), toSink);
+                    .observe(session(), _serviceContext->getPreciseClockSource()->now(), toSink);

                _outMessage = std::move(toSink);
            } else {
@@ -519,45 +510,46 @@ Future<void> ServiceStateMachine::_processMessage(ThreadGuard guard) {
        });
 }

-void ServiceStateMachine::start() {
-    _executor()->schedule(
-        GuaranteedExecutor::enforceRunOnce([this, anchor = shared_from_this()](Status status) {
-            // TODO(SERVER-49109) We can't use static ownership in general with
-            // a ServiceExecutorFixed and async commands. ThreadGuard needs to become smarter.
-            ThreadGuard guard(shared_from_this().get());
-            guard.markStaticOwnership();
+void ServiceStateMachine::Impl::start(ServiceExecutorContext seCtx) {
+    {
+        auto client = _clientStrand->getClientPointer();
+        stdx::lock_guard lk(*client);
+        ServiceExecutorContext::set(client, std::move(seCtx));
+    }

-            // If this is the first run of the SSM, then update its state to Source
-            if (state() == State::Created) {
-                _state.store(State::Source);
+    invariant(_state.swap(State::Source) == State::Created);
+
+    auto cb = [this, anchor = shared_from_this()](Status status) {
+        _clientStrand->run([&] {
+            if (ErrorCodes::isCancelationError(status) || ErrorCodes::isNetworkError(status)) {
+                cleanupSession(status);
+                return;
            }
+            invariant(status);

-            _runOnce();
-        }));
+            runOnce();
+        });
+    };
+    executor()->runOnDataAvailable(session(), std::move(cb));
 }

-void ServiceStateMachine::_runOnce() {
+void ServiceStateMachine::Impl::runOnce() {
    makeReadyFutureWith([&]() -> Future<void> {
        if (_inExhaust) {
            return Status::OK();
        } else {
-            return _sourceMessage(ThreadGuard(this));
+            return sourceMessage();
        }
    })
-        .then([this]() { return _processMessage(ThreadGuard(this)); })
+        .then([this]() { return processMessage(); })
        .then([this]() -> Future<void> {
            if (_outMessage.empty()) {
                return Status::OK();
            }

-            return _sinkMessage(ThreadGuard(this));
+            return sinkMessage();
        })
-        .getAsync([this, anchor = shared_from_this()](Status status) {
-            // Destroy the opCtx (already killed) here, to potentially use the delay between
-            // clients' requests to hide the destruction cost.
-            if (MONGO_likely(_killedOpCtx)) {
-                _killedOpCtx.reset();
-            }
+        .getAsync([this](Status status) {
            if (!status.isOK()) {
                _state.store(State::EndSession);
                // The service executor failed to schedule the task. This could for example be that
@@ -569,52 +561,59 @@ void ServiceStateMachine::_runOnce() {
                                      "Terminating session due to error",
                                      "error"_attr = status);
                terminate();
+                cleanupSession(status);

-                ThreadGuard terminateGuard(this);
-                _cleanupSession(std::move(terminateGuard));
                return;
            }

-            _executor()->schedule(GuaranteedExecutor::enforceRunOnce(
-                [this, anchor = shared_from_this()](Status status) { _runOnce(); }));
+            auto cb = [this, anchor = shared_from_this()](Status status) {
+                _clientStrand->run([&] {
+                    if (ErrorCodes::isCancelationError(status) ||
+                        ErrorCodes::isNetworkError(status)) {
+                        cleanupSession(status);
+                        return;
+                    }
+                    invariant(status);
+
+                    runOnce();
+                });
+            };
+
+            // Start our loop again with a new stack.
+            if (_inExhaust) {
+                // If we're in exhaust, we're not expecting more data.
+                executor()->schedule(std::move(cb));
+            } else {
+                executor()->runOnDataAvailable(session(), std::move(cb));
+            }
        });
 }

-void ServiceStateMachine::terminate() {
+void ServiceStateMachine::Impl::terminate() {
    if (state() == State::Ended)
        return;

-    _session()->end();
+    session()->end();
 }

-void ServiceStateMachine::terminateIfTagsDontMatch(transport::Session::TagMask tags) {
+void ServiceStateMachine::Impl::terminateIfTagsDontMatch(transport::Session::TagMask tags) {
    if (state() == State::Ended)
        return;

-    auto sessionTags = _session()->getTags();
+    auto sessionTags = session()->getTags();

    // If terminateIfTagsDontMatch gets called when we still are 'pending' where no tags have been
    // set, then skip the termination check.
    if ((sessionTags & tags) || (sessionTags & transport::Session::kPending)) {
-        LOGV2(22991,
-              "Skip closing connection for connection",
-              "connectionId"_attr = _session()->id());
+        LOGV2(
+            22991, "Skip closing connection for connection", "connectionId"_attr = session()->id());
        return;
    }

    terminate();
 }

-void ServiceStateMachine::setCleanupHook(std::function<void()> hook) {
-    invariant(state() == State::Created);
-    _cleanupHook = std::move(hook);
-}
-
-ServiceStateMachine::State ServiceStateMachine::state() {
-    return _state.load();
-}
-
-void ServiceStateMachine::_terminateAndLogIfError(Status status) {
+void ServiceStateMachine::Impl::terminateAndLogIfError(Status status) {
    if (!status.isOK()) {
        LOGV2_WARNING_OPTIONS(22993,
                              {logv2::LogComponent::kExecutor},
@@ -625,7 +624,7 @@ void ServiceStateMachine::_terminateAndLogIfError(Status status) {
    }
 }

-void ServiceStateMachine::_cleanupExhaustResources() noexcept try {
+void ServiceStateMachine::Impl::cleanupExhaustResources() noexcept try {
    if (!_inExhaust) {
        return;
    }
@@ -635,8 +634,8 @@ void ServiceStateMachine::_cleanupExhaustResources() noexcept try {
        auto cursorId = request.body["getMore"].Long();
        auto opCtx = Client::getCurrent()->makeOperationContext();
        // Fire and forget. This is a best effort attempt to immediately clean up the exhaust
-        // cursor. If the killCursors request fails here for any reasons, it will still be
-        // cleaned up once the cursor times out.
+        // cursor. If the killCursors request fails here for any reasons, it will still be cleaned
+        // up once the cursor times out.
        _sep->handleRequest(opCtx.get(), makeKillCursorsMessage(cursorId)).get();
    }
 } catch (const DBException& e) {
@@ -646,24 +645,50 @@ void ServiceStateMachine::_cleanupExhaustResources() noexcept try {
          "error"_attr = e.toStatus());
 }

-void ServiceStateMachine::_cleanupSession(ThreadGuard guard) {
-    // Ensure the delayed destruction of opCtx always happens before doing the cleanup.
-    if (MONGO_likely(_killedOpCtx)) {
-        _killedOpCtx.reset();
-    }
-    invariant(!_killedOpCtx);
+void ServiceStateMachine::Impl::setCleanupHook(std::function<void()> hook) {
+    invariant(state() == State::Created);
+    _cleanupHook = std::move(hook);
+}

-    _cleanupExhaustResources();
+void ServiceStateMachine::Impl::cleanupSession(const Status& status) {
+    LOGV2_INFO(5127900, "Ending session", "error"_attr = status);
+
+    cleanupExhaustResources();
+
+    {
+        auto client = _clientStrand->getClientPointer();
+        stdx::lock_guard lk(*client);
+        transport::ServiceExecutorContext::reset(client);
+    }
+
+    if (auto cleanupHook = std::exchange(_cleanupHook, {})) {
+        cleanupHook();
+    }

    _state.store(State::Ended);

    _inMessage.reset();

    _outMessage.reset();
+}

-    // By ignoring the return value of Client::releaseCurrent() we destroy the session.
-    // _dbClient is now nullptr and _dbClientPtr is invalid and should never be accessed.
-    Client::releaseCurrent();
+ServiceStateMachine::ServiceStateMachine(ServiceContext::UniqueClient client)
+    : _impl{std::make_shared<Impl>(std::move(client))} {}
+
+void ServiceStateMachine::start(ServiceExecutorContext seCtx) {
+    _impl->start(std::move(seCtx));
+}
+
+void ServiceStateMachine::setCleanupHook(std::function<void()> hook) {
+    _impl->setCleanupHook(std::move(hook));
+}
+
+void ServiceStateMachine::terminate() {
+    _impl->terminate();
+}
+
+void ServiceStateMachine::terminateIfTagsDontMatch(transport::Session::TagMask tags) {
+    _impl->terminateIfTagsDontMatch(tags);
 }

 }  // namespace transport
--- a/src/mongo/transport/service_state_machine.h
+++ b/src/mongo/transport/service_state_machine.h
@@ -29,24 +29,13 @@

 #pragma once

-#include <atomic>
 #include <functional>
 #include <memory>

-#include "mongo/base/status.h"
-#include "mongo/config.h"
-#include "mongo/db/client.h"
 #include "mongo/db/service_context.h"
-#include "mongo/platform/atomic_word.h"
-#include "mongo/platform/mutex.h"
-#include "mongo/stdx/thread.h"
-#include "mongo/transport/message_compressor_base.h"
-#include "mongo/transport/service_entry_point.h"
 #include "mongo/transport/service_executor.h"
 #include "mongo/transport/session.h"
 #include "mongo/transport/transport_mode.h"
-#include "mongo/util/future.h"
-#include "mongo/util/net/ssl_manager.h"

 namespace mongo {
 namespace transport {
@@ -57,58 +46,25 @@ namespace transport {
 * ServiceEntryPoint and TransportLayer that ties network and database logic together for a
 * user.
 */
-class ServiceStateMachine : public std::enable_shared_from_this<ServiceStateMachine> {
+class ServiceStateMachine {
    ServiceStateMachine(ServiceStateMachine&) = delete;
    ServiceStateMachine& operator=(ServiceStateMachine&) = delete;

-public:
    ServiceStateMachine(ServiceStateMachine&&) = delete;
    ServiceStateMachine& operator=(ServiceStateMachine&&) = delete;

+public:
+    class Impl;
+
    /*
     * Construct a ServiceStateMachine for a given Client.
     */
    ServiceStateMachine(ServiceContext::UniqueClient client);

-    /*
-     * Any state may transition to EndSession in case of an error, otherwise the valid state
-     * transitions are:
-     * Source -> SourceWait -> Process -> SinkWait -> Source (standard RPC)
-     * Source -> SourceWait -> Process -> SinkWait -> Process -> SinkWait ... (exhaust)
-     * Source -> SourceWait -> Process -> Source (fire-and-forget)
-     */
-    enum class State {
-        Created,     // The session has been created, but no operations have been performed yet
-        Source,      // Request a new Message from the network to handle
-        SourceWait,  // Wait for the new Message to arrive from the network
-        Process,     // Run the Message through the database
-        SinkWait,    // Wait for the database result to be sent by the network
-        EndSession,  // End the session - the ServiceStateMachine will be invalid after this
-        Ended        // The session has ended. It is illegal to call any method besides
-                     // state() if this is the current state.
-    };
-
-    /*
-     * When start() is called with Ownership::kOwned, the SSM will swap the Client/thread name
-     * whenever it runs a stage of the state machine, and then unswap them out when leaving the SSM.
-     *
-     * With Ownership::kStatic, it will assume that the SSM will only ever be run from one thread,
-     * and that thread will not be used for other SSM's. It will swap in the Client/thread name
-     * for the first run and leave them in place.
-     *
-     * kUnowned is used internally to mark that the SSM is inactive.
-     */
-    enum class Ownership { kUnowned, kOwned, kStatic };
-
    /*
     * start() schedules a call to _runOnce() in the future.
     */
-    void start();
-
-    /*
-     * Gets the current state of connection for testing/diagnostic purposes.
-     */
-    State state();
+    void start(ServiceExecutorContext seCtx);

    /*
     * Terminates the associated transport Session, regardless of tags.
@@ -132,127 +88,8 @@ public:
    void setCleanupHook(std::function<void()> hook);

 private:
-    /*
-     * A class that wraps up lifetime management of the _dbClient and _threadName for
-     * each step in _runOnce();
-     */
-    class ThreadGuard;
-    friend class ThreadGuard;
-
-    /*
-     * Terminates the associated transport Session if status indicate error.
-     *
-     * This will not block on the session terminating cleaning itself up, it returns immediately.
-     */
-    void _terminateAndLogIfError(Status status);
-
-    /*
-     * This is a helper function to schedule tasks on the serviceExecutor maintaining a shared_ptr
-     * copy to anchor the lifetime of the SSM while waiting for callbacks to run.
-     *
-     * If scheduling the function fails, the SSM will be terminated and cleaned up immediately
-     */
-    void _scheduleNextWithGuard(ThreadGuard guard,
-                                transport::ServiceExecutor::ScheduleFlags flags,
-                                Ownership ownershipModel = Ownership::kOwned);
-
-    /*
-     * Gets the transport::Session associated with this connection
-     */
-    const transport::SessionHandle& _session() const;
-
-    /*
-     * Gets the transport::ServiceExecutor associated with this connection.
-     */
-    ServiceExecutor* _executor();
-
-    /*
-     * This function actually calls into the database and processes a request. It's broken out
-     * into its own inline function for better readability.
-     */
-    Future<void> _processMessage(ThreadGuard guard);
-
-    /*
-     * These get called by the TransportLayer when requested network I/O has completed.
-     */
-    void _sourceCallback(Status status);
-    void _sinkCallback(Status status);
-
-    /*
-     * Source/Sink message from the TransportLayer. These will invalidate the ThreadGuard just
-     * before waiting on the TL.
-     */
-    Future<void> _sourceMessage(ThreadGuard guard);
-    Future<void> _sinkMessage(ThreadGuard guard);
-
-    /*
-     * Releases all the resources associated with the session and call the cleanupHook.
-     */
-    void _cleanupSession(ThreadGuard guard);
-
-    /*
-     * This is the initial function called at the beginning of a thread's lifecycle in the
-     * TransportLayer.
-     */
-    void _runOnce();
-
-    /*
-     * Releases all the resources associated with the exhaust request.
-     */
-    void _cleanupExhaustResources() noexcept;
-
-    AtomicWord<State> _state{State::Created};
-
-    ServiceContext* const _serviceContext;
-    ServiceEntryPoint* const _sep;
-
-    transport::SessionHandle _sessionHandle;
-    ServiceContext::UniqueClient _dbClient;
-    Client* _dbClientPtr;
-    std::function<void()> _cleanupHook;
-
-    bool _inExhaust = false;
-    boost::optional<MessageCompressorId> _compressorId;
-    Message _inMessage;
-    Message _outMessage;
-
-    // Allows delegating destruction of opCtx to another function to potentially remove its cost
-    // from the critical path. This is currently only used in `_processMessage()`.
-    ServiceContext::UniqueOperationContext _killedOpCtx;
-
-    AtomicWord<Ownership> _owned{Ownership::kUnowned};
-#if MONGO_CONFIG_DEBUG_BUILD
-    AtomicWord<stdx::thread::id> _owningThread;
-#endif
+    std::shared_ptr<Impl> _impl;
 };

-template <typename T>
-T& operator<<(T& stream, const ServiceStateMachine::State& state) {
-    switch (state) {
-        case ServiceStateMachine::State::Created:
-            stream << "created";
-            break;
-        case ServiceStateMachine::State::Source:
-            stream << "source";
-            break;
-        case ServiceStateMachine::State::SourceWait:
-            stream << "sourceWait";
-            break;
-        case ServiceStateMachine::State::Process:
-            stream << "process";
-            break;
-        case ServiceStateMachine::State::SinkWait:
-            stream << "sinkWait";
-            break;
-        case ServiceStateMachine::State::EndSession:
-            stream << "endSession";
-            break;
-        case ServiceStateMachine::State::Ended:
-            stream << "ended";
-            break;
-    }
-    return stream;
-}
-
 }  // namespace transport
 }  // namespace mongo
--- a/src/mongo/transport/session.h
+++ b/src/mongo/transport/session.h
@@ -116,9 +116,10 @@ public:
    virtual Future<Message> asyncSourceMessage(const BatonHandle& handle = nullptr) = 0;

    /**
-     * Asynchronously waits for the availability of incoming data.
+     * Waits for the availability of incoming data.
     */
-    virtual Future<void> waitForData() = 0;
+    virtual Status waitForData() = 0;
+    virtual Future<void> asyncWaitForData() = 0;

    /**
     * Sink (send) a Message to the remote host for this Session.
--- a/src/mongo/transport/session_asio.h
+++ b/src/mongo/transport/session_asio.h
@@ -167,12 +167,16 @@ public:
        return sourceMessageImpl(baton);
    }

-    Future<void> waitForData() override {
-#ifdef MONGO_CONFIG_SSL
-        if (_sslSocket)
-            return asio::async_read(*_sslSocket, asio::null_buffers(), UseFuture{}).ignoreValue();
-#endif
-        return asio::async_read(_socket, asio::null_buffers(), UseFuture{}).ignoreValue();
+    Status waitForData() override {
+        ensureSync();
+        asio::error_code ec;
+        getSocket().wait(asio::ip::tcp::socket::wait_read, ec);
+        return errorCodeToStatus(ec);
+    }
+
+    Future<void> asyncWaitForData() override {
+        ensureAsync();
+        return getSocket().async_wait(asio::ip::tcp::socket::wait_read, UseFuture{});
    }

    Status sinkMessage(Message message) override {
--- a/src/mongo/transport/transport_layer_asio.cpp
+++ b/src/mongo/transport/transport_layer_asio.cpp
@@ -265,7 +265,10 @@ TransportLayerASIO::TransportLayerASIO(const TransportLayerASIO::Options& opts,
      _egressReactor(std::make_shared<ASIOReactor>()),
      _acceptorReactor(std::make_shared<ASIOReactor>()),
      _sep(sep),
-      _listenerOptions(opts) {}
+      _listenerOptions(opts) {
+    invariant(!opts.isIngress() || _sep,
+              "TransportLayers with ingress must have a ServiceEntryPoint.");
+}

 TransportLayerASIO::~TransportLayerASIO() = default;
Author	SHA1	Message	Date
Amirsaman Memaripour	63338e0dd6	SERVER-51690 Futurize and refactor Mongos execCommandClient	2020-11-18 00:38:58 +00:00
Amirsaman Memaripour	13bb35d34d	SERVER-51690 Futurize Mongos runCommand for async command execution	2020-11-17 17:07:44 +00:00
Amirsaman Memaripour	f4373b85f0	SERVER-51690 Futurize clientCommand to support async command execution	2020-11-05 02:21:26 +00:00
Amirsaman Memaripour	51067e0952	SERVER-49108 Convert buildInfo command to async implementation	2020-11-03 16:43:16 +00:00
Amirsaman Memaripour	9262aac9e7	SERVER-51690 Futurize Mongos handleRequest for async command execution	2020-11-02 17:38:29 +00:00
Ben Caimano	5b57abab3e	SERVER-51499 Track service executor statistics more thoroughly	2020-10-23 21:48:26 +00:00
Ben Caimano	907bf93c57	SERVER-49109 Move to the dedicated executor after blocking commands	2020-10-23 21:48:21 +00:00
Ben Caimano	243a506fca	SERVER-49109 ServiceExecutorFixed tracks work and sessions This commit also adds a server parameter to start on the "borrowed" threading model and introduces an evergreen variant for it.	2020-10-23 21:47:37 +00:00
Amirsaman Memaripour	8cff99b2c4	SERVER-50452 Make destruction of ServiceExecutorFixed deterministic This commit introduces new fail-points to allow unit-tests deterministically examine creation and return of executor threads.	2020-10-23 21:47:37 +00:00
Ben Caimano	439730c7f4	SERVER-51279 Updating runOnDataAvailable	2020-10-20 22:06:45 +00:00
Amirsaman Memaripour	18f1d00029	SERVER-49107 Futurize migrationConflictHandler and runCommandInvocation	2020-10-19 22:44:45 +00:00
Ben Caimano	fa29e47f37	SERVER-51278 Introduced ClientStrand	2020-10-19 20:25:05 +00:00
Amirsaman Memaripour	b03c93d55b	SERVER-49107 Futurize and refactor command invocation	2020-10-14 17:38:12 +00:00
Amirsaman Memaripour	1d6af89487	SERVER-49107 Futurize and refactor runCommandImpl()	2020-10-14 17:17:30 +00:00
Amirsaman Memaripour	38f350478a	SERVER-49107 Futurize and refactor execCommandDatabase()	2020-09-26 01:43:36 +00:00
Amirsaman Memaripour	4c283d5c34	SERVER-49107 Futurize and refactor receivedCommands()	2020-09-25 18:16:01 +00:00
Ben Caimano	efb3f7689e	SERVER-51080 Fix mongobridge startup ordering	2020-09-22 16:02:52 +00:00
Amirsaman Memaripour	66b412e165	SERVER-49107 Add support for async execution to handleRequest This patch extends handleRequest to capture context for out-of-line execution and makes ServiceStateMachine own the opCtx used for command execution.	2020-09-17 17:36:35 +00:00
Ben Caimano	5f3f8d9745	SERVER-50947 Implize ServiceStateMachine	2020-09-17 02:08:00 +00:00
Amirsaman Memaripour	dabf0e0842	SERVER-49107 Allow binding clients to executor threads	2020-09-14 22:01:12 +00:00
Ben Caimano	92e7ecf4ba	SERVER-49073 Track connections that are maxConn exempt or threaded	2020-09-14 04:04:35 +00:00
Ben Caimano	46b220b83e	Revert "SERVER-50867 Roll back ServiceStateMachine changes temporarily" This reverts commit `575f370475`.	2020-09-14 04:01:09 +00:00
Ben Caimano	575f370475	SERVER-50867 Roll back ServiceStateMachine changes temporarily This reverts these commits: `b039b24746`: SERVER-48980 `97e16187ff`: SERVER-49072 `0607a6c291`: SERVER-49104	2020-09-11 20:22:25 +00:00