From 749e7c3fc9f501f4b3a94a06b4c56cf5a4373689 Mon Sep 17 00:00:00 2001 From: Philip Stoev Date: Tue, 10 Mar 2026 07:17:06 +0200 Subject: [PATCH] SERVER-119723 Allow a jstest to access the TPC-H dataset (#48158) GitOrigin-RevId: 16528783332a63273179fbb602936be8b57dca18 --- etc/evergreen_yml_components/definitions.yml | 19 +++++++ evergreen/BUILD.bazel | 5 ++ evergreen/OWNERS.yml | 3 ++ evergreen/fetch_mongodb_database_tools.sh | 17 ++++++ evergreen/functions/BUILD.bazel | 5 ++ evergreen/functions/get_mongodb_tools_url.sh | 36 +++++++++++++ evergreen/gen_supplementary_data.sh | 33 +----------- jstests/libs/OWNERS.yml | 3 ++ jstests/libs/mongodb_database_tools.js | 56 ++++++++++++++++++++ 9 files changed, 146 insertions(+), 31 deletions(-) create mode 100755 evergreen/fetch_mongodb_database_tools.sh create mode 100644 evergreen/functions/get_mongodb_tools_url.sh create mode 100644 jstests/libs/mongodb_database_tools.js diff --git a/etc/evergreen_yml_components/definitions.yml b/etc/evergreen_yml_components/definitions.yml index 0bb57f50675..1ea169a7fcb 100644 --- a/etc/evergreen_yml_components/definitions.yml +++ b/etc/evergreen_yml_components/definitions.yml @@ -3689,3 +3689,22 @@ functions: "save tracing data": - *tar_tracing_data - *archive_tracing_data + + "fetch tpch dataset": + - *f_expansions_write + - command: s3.get + params: + role_arn: arn:aws:iam::579766882180:role/evergreen.mongo-db-master + bucket: query-benchmark-data + region: us-east-1 + remote_file: tpc-h/tpch-${scale}-normalized.archive.gz + local_file: tpc-h/tpch-${scale}-normalized.archive.gz + require_checksum_sha256: ${checksum} + + "fetch mongodb database tools": + - *f_expansions_write + - command: subprocess.exec + params: + binary: bash + args: + - "./src/evergreen/fetch_mongodb_database_tools.sh" diff --git a/evergreen/BUILD.bazel b/evergreen/BUILD.bazel index 08892e35c9d..c893e5b0460 100644 --- a/evergreen/BUILD.bazel +++ b/evergreen/BUILD.bazel @@ -159,6 +159,11 @@ sh_binary( srcs = ["feature_flag_tags_check.sh"], ) +sh_binary( + name = "fetch_mongodb_database_tools", + srcs = ["fetch_mongodb_database_tools.sh"], +) + sh_binary( name = "garasign_gpg_crypt_sign", srcs = ["garasign_gpg_crypt_sign.sh"], diff --git a/evergreen/OWNERS.yml b/evergreen/OWNERS.yml index 875ae3713a8..6578ce997ec 100644 --- a/evergreen/OWNERS.yml +++ b/evergreen/OWNERS.yml @@ -60,3 +60,6 @@ filters: - "verify_all_extensions_visibility.sh": approvers: - 10gen/query-integration-extensions-api + - "fetch_mongodb_database_tools.sh": + approvers: + - 10gen/query-optimization-correctness diff --git a/evergreen/fetch_mongodb_database_tools.sh b/evergreen/fetch_mongodb_database_tools.sh new file mode 100755 index 00000000000..73fb40ec717 --- /dev/null +++ b/evergreen/fetch_mongodb_database_tools.sh @@ -0,0 +1,17 @@ +# +# Download the MongoDB Database tools so that they are available for use in jstests +# + +set -ex + +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +source "$DIR/functions/get_mongodb_tools_url.sh" + +mkdir -p mongodb_database_tools +pushd mongodb_database_tools + +database_tools_url="$(get_mongodb_tools_url 100.14.1)" || exit 1 +# Place the tools under mongodb_database_tools/bin in the root evergreen directory +curl ${database_tools_url} | tar xvz --strip-components=1 + +popd diff --git a/evergreen/functions/BUILD.bazel b/evergreen/functions/BUILD.bazel index a3a7f425552..82ae1a6182e 100644 --- a/evergreen/functions/BUILD.bazel +++ b/evergreen/functions/BUILD.bazel @@ -47,6 +47,11 @@ sh_binary( srcs = ["get_mongot_version.sh"], ) +sh_binary( + name = "get_mongodb_tools_url", + srcs = ["get_mongodb_tools_url.sh"], +) + sh_binary( name = "modified_patch_files_get_all", srcs = ["modified_patch_files_get_all.sh"], diff --git a/evergreen/functions/get_mongodb_tools_url.sh b/evergreen/functions/get_mongodb_tools_url.sh new file mode 100644 index 00000000000..41153109c05 --- /dev/null +++ b/evergreen/functions/get_mongodb_tools_url.sh @@ -0,0 +1,36 @@ +get_mongodb_tools_url() { + local arch=$(uname -m) + local mongodb_tools_version="$1" + local database_tools_url + + if [ -f /etc/os-release ]; then + . /etc/os-release + if [ "$ID" == "amzn" ]; then + case $arch in + "x86_64" | "aarch64") + case $VERSION_ID in + "2" | "2023") + database_tools_url="https://fastdl.mongodb.org/tools/db/mongodb-database-tools-amazon${VERSION_ID}-${arch}-${mongodb_tools_version}.tgz" + ;; + *) + echo "Unsupported Amazon Linux version: $VERSION_ID" + return 1 + ;; + esac + ;; + *) + echo "Unsupported architecture: $arch" + return 1 + ;; + esac + else + echo "Unsupported Linux distribution: $ID" + return 1 + fi + else + echo "Unable to determine Linux distribution" + return 1 + fi + + echo "$database_tools_url" +} diff --git a/evergreen/gen_supplementary_data.sh b/evergreen/gen_supplementary_data.sh index 8b7b9c1779d..f4b9591e080 100755 --- a/evergreen/gen_supplementary_data.sh +++ b/evergreen/gen_supplementary_data.sh @@ -1,5 +1,6 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" . "$DIR/prelude.sh" +source "$DIR/functions/get_mongodb_tools_url.sh" cd src @@ -21,37 +22,7 @@ if [ -z "${build_patch_id}" ] || [ -z "${reuse_compile_from}" ] || [ "${is_patch mkdir -p mongodb/server_params cp ./all_server_params.txt mongodb/server_params - # Download mongo tools - arch=$(uname -m) - if [ -f /etc/os-release ]; then - . /etc/os-release - if [ "$ID" == "amzn" ]; then - case $arch in - "x86_64" | "aarch64") - case $VERSION_ID in - "2" | "2023") - binary_url="https://fastdl.mongodb.org/tools/db/mongodb-database-tools-amazon${VERSION_ID}-${arch}-100.9.4.tgz" - ;; - *) - echo "Unsupported Amazon Linux version: $VERSION_ID" - exit 1 - ;; - esac - ;; - *) - echo "Unsupported architecture: $arch" - exit 1 - ;; - esac - else - echo "Unsupported Linux distribution: $ID" - exit 1 - fi - else - echo "Unable to determine Linux distribution" - exit 1 - fi - + binary_url="$(get_mongodb_tools_url 100.9.4)" || exit 1 wget "$binary_url" -O mongo-tools.tar.gz tar -xzvf mongo-tools.tar.gz -C mongodb/ --strip-components=1 "mong*/bin" diff --git a/jstests/libs/OWNERS.yml b/jstests/libs/OWNERS.yml index 9dd2d0de279..cbf90ca4038 100644 --- a/jstests/libs/OWNERS.yml +++ b/jstests/libs/OWNERS.yml @@ -111,3 +111,6 @@ filters: - "cluster_server_parameter_utils.js": approvers: - 10gen/server-catalog-and-routing-routing-and-topology + - "mongodb_database_tools.js": + approvers: + - 10gen/query-optimization-correctness diff --git a/jstests/libs/mongodb_database_tools.js b/jstests/libs/mongodb_database_tools.js new file mode 100644 index 00000000000..07d6073aeb0 --- /dev/null +++ b/jstests/libs/mongodb_database_tools.js @@ -0,0 +1,56 @@ +/** + * Allows the execution of the MongoDB Database Tools from within a jstest. + * + * The "fetch database tools" evergreen command makes those tools available + * in the environment. + * + * @class + */ +export class Mongorestore { + constructor() { + this.uri = "mongodb://" + db.getMongo().host; + } + + execute({ + archive, + nsFrom = undefined, + nsTo = undefined, + drop = true, + maintainInsertionOrder = true, + gzip = true, + } = {}) { + if (archive === undefined) { + throw new Error("Archive must be provided to Mongorestore.execute()"); + } + + let args = [ + TestData.inEvergreen ? "../mongodb_database_tools/bin/mongorestore" : "mongorestore", + "--uri", + this.uri, + ]; + + if (nsFrom) { + args.push(`--nsFrom=${nsFrom}`); + } + + if (nsTo) { + args.push(`--nsTo=${nsTo}`); + } + + if (maintainInsertionOrder) { + args.push("--maintainInsertionOrder"); + } + + if (gzip) { + args.push("--gzip"); + } + + if (drop) { + args.push("--drop"); + } + + args.push(`--archive=${archive}`); + + assert.eq(runNonMongoProgram(...args), 0); + } +}