From cd6a9b34b55ea52aaee4e4b14aa4b8ceabc70587 Mon Sep 17 00:00:00 2001 From: Daniel Moody Date: Tue, 28 Nov 2023 22:20:17 +0000 Subject: [PATCH] SERVER-83258 add python toolchain to bazel build --- .bazelrc | 4 + BUILD.bazel | 3 + WORKSPACE.bazel | 57 +++++- bazel/BUILD.bazel | 16 ++ bazel/hello_world.py | 17 ++ bazel/mongo_src_rules.bzl | 80 +++++++- bazel/platforms/BUILD.bazel | 1 + bazel/toolchains/python_toolchain.BUILD | 35 ++++ bazel/toolchains/python_toolchain.bzl | 199 +++++++++++++++++++ etc/evergreen_yml_components/definitions.yml | 30 ++- poetry.lock | 2 +- pyproject.toml | 4 +- 12 files changed, 441 insertions(+), 7 deletions(-) create mode 100644 BUILD.bazel create mode 100644 bazel/hello_world.py create mode 100644 bazel/toolchains/python_toolchain.BUILD create mode 100644 bazel/toolchains/python_toolchain.bzl diff --git a/.bazelrc b/.bazelrc index c5e4844f563..ab767a9305f 100644 --- a/.bazelrc +++ b/.bazelrc @@ -6,6 +6,10 @@ build --enable_platform_specific_config=true # This makes builds more hermetic by preventing environment variables from leaking into the execution of rules build --incompatible_strict_action_env=true +# allows us to use python rules. This is needed because of the bug: +# https://github.com/bazelbuild/bazel/issues/4327 +build --nobuild_runfile_links + # This supports the "crosstool" feature (aka building from our toolchain). # Currently the only platform with a custom toolchain config is linux, use the default # toolchain_suite elsewhere. diff --git a/BUILD.bazel b/BUILD.bazel new file mode 100644 index 00000000000..6b69cf8205a --- /dev/null +++ b/BUILD.bazel @@ -0,0 +1,3 @@ +package(default_visibility = ["//visibility:public"]) + +exports_files(["pyproject.toml", "poetry.lock"]) diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index 03e7f496940..d46e96241af 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -13,7 +13,60 @@ http_archive( "https://mciuploads.s3.amazonaws.com/toolchain-builder/ubuntu2204-arm64/c8946f1ee23987ed32481a2f571d0ee38f86667b/bazel_v4_toolchain_builder_ubuntu2204_arm64_c8946f1ee23987ed32481a2f571d0ee38f86667b_23_10_25_04_25_33.tar.gz", ], ) +register_toolchains("@mongo_toolchain//:mongo_toolchain") -register_toolchains( - "@mongo_toolchain//:mongo_toolchain", +load("//bazel/toolchains:python_toolchain.bzl", "setup_mongo_python_toolchains") +[register_toolchains(toolchain) for toolchain in setup_mongo_python_toolchains()] + +http_archive( + name = "rules_python", + sha256 = "8c8fe44ef0a9afc256d1e75ad5f448bb59b81aba149b8958f02f7b3a98f5d9b4", + strip_prefix = "rules_python-0.13.0", + url = "https://github.com/bazelbuild/rules_python/archive/refs/tags/0.13.0.tar.gz", +) + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "aspect_bazel_lib", + sha256 = "e3151d87910f69cf1fc88755392d7c878034a69d6499b287bcfc00b1cf9bb415", + strip_prefix = "bazel-lib-1.32.1", + url = "https://github.com/aspect-build/bazel-lib/releases/download/v1.32.1/bazel-lib-v1.32.1.tar.gz", +) + +# aspect rules rely on skylib, so this function will download dependency +load("@aspect_bazel_lib//lib:repositories.bzl", "aspect_bazel_lib_dependencies") +aspect_bazel_lib_dependencies() + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +http_archive( + name = "platforms", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/platforms/releases/download/0.0.8/platforms-0.0.8.tar.gz", + "https://github.com/bazelbuild/platforms/releases/download/0.0.8/platforms-0.0.8.tar.gz", + ], + sha256 = "8150406605389ececb6da07cbcb509d5637a3ab9a24bc69b1101531367d89d74", +) + +# Poetry rules for managing Python dependencies + +http_archive( + name = "rules_poetry", + strip_prefix = "rules_poetry-04f2b93c83587ab377bf5a724eaafd63b4ff5930", + urls = ["https://github.com/mongodb-forks/rules_poetry/archive/04f2b93c83587ab377bf5a724eaafd63b4ff5930.tar.gz"], +) + +load("@rules_poetry//rules_poetry:defs.bzl", "poetry_deps") + +poetry_deps() + +load("@rules_poetry//rules_poetry:poetry.bzl", "poetry") + +poetry( + name = "poetry", + lockfile = "//:poetry.lock", + pyproject = "//:pyproject.toml", + python_interpreter_target_default="@py_host//:bin/python3", + python_interpreter_target_mac="@py_host//:bin/python3", + python_interpreter_target_win="@py_host//:python.exe", ) diff --git a/bazel/BUILD.bazel b/bazel/BUILD.bazel index e69de29bb2d..d949ea2247f 100644 --- a/bazel/BUILD.bazel +++ b/bazel/BUILD.bazel @@ -0,0 +1,16 @@ +# TODO SERVER-80633 remove this when we have rules which use the toolchain +load("@poetry//:dependencies.bzl", "dependency") +load("//bazel:mongo_src_rules.bzl", "run_python_buildscript") + +run_python_buildscript( + name = "py_hello_world", # label for this target + script = "hello_world.py", # entry point script to execute + outputs = [":hello_world.txt"], # generated outputs + args = [ # args + "--output-file", + "$(execpath :hello_world.txt)", + "--hello-target", + "World" + ], + py_deps = [dependency("networkx")], # pip dependencies +) diff --git a/bazel/hello_world.py b/bazel/hello_world.py new file mode 100644 index 00000000000..881d508aa8d --- /dev/null +++ b/bazel/hello_world.py @@ -0,0 +1,17 @@ +# TODO SERVER-80633 remove this when we have rules which use the toolchain +import networkx +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--output-file") +parser.add_argument("--hello-target") +args = parser.parse_args() + +g = networkx.Graph() + +important_message = f"Hello {args.hello_target}!, I made a {type(g)}" + +print(important_message) + +with open(args.output_file, 'w') as f: + f.write(important_message) diff --git a/bazel/mongo_src_rules.bzl b/bazel/mongo_src_rules.bzl index 1ea121c33ca..12e89aad190 100644 --- a/bazel/mongo_src_rules.bzl +++ b/bazel/mongo_src_rules.bzl @@ -1,5 +1,5 @@ # Common mongo-specific bazel build rules intended to be used in individual BUILD files in the "src/" subtree. - +load("@aspect_bazel_lib//lib:expand_make_vars.bzl", "expand_locations") # === Windows-specific compilation settings === # /RTC1 Enable Stack Frame Run-Time Error Checking; Reports when a variable is used without having been initialized (implies /Od: no optimizations) @@ -169,3 +169,81 @@ def mongo_cc_binary( }), includes = [], ) + + +# This is an example of running a python script for a build where its expected some output +# of the script will then be the input to another part of the build, source code generation +# for example. Technically we could use py_binary + genrule (or even just genrule), but +# interface is so generic that it become complex to create such genrules, and you +# end up writing a bunch of implementation logic into the build file. +# This rule is in itself generic as well and may be subject for removal in the future +# depending on uses cases and or other rules that are implemented. +def run_python_buildscript_impl(ctx): + + # will use this to get a path to the interpreter + python = ctx.toolchains["@bazel_tools//tools/python:toolchain_type"].py3_runtime + + # collect deps from and extra python files. + src_depsets = [src.files for src in ctx.attr.srcs] + + # collect deps from python modules and setup the corresponding + # path so all modules can be found by the toolchain. + python_path = [] + for py_dep in ctx.attr.py_deps: + for dep in py_dep[PyInfo].transitive_sources.to_list(): + if dep.path not in python_path: + python_path.append(dep.path) + py_depsets = [py_dep[PyInfo].transitive_sources for py_dep in ctx.attr.py_deps] + + # aggregate the deps to a single var + inputs = depset(transitive=[ + ctx.attr.script.files, + python.files] + src_depsets + py_depsets) + + # resolve and bazel expansion that may be present in the args + args_resolved = [] + for arg in ctx.attr.args: + args_resolved.append(expand_locations(ctx, arg)) + + # Run it! + ctx.actions.run( + executable = python.interpreter.path, + outputs = ctx.outputs.outputs, + inputs = inputs, + arguments = [ctx.attr.script.files.to_list()[0].path] + args_resolved, + mnemonic = "PythonScriptRunner", + env={"PYTHONPATH":':'.join(python_path)} + ) + + return [ + DefaultInfo( + files = depset(ctx.outputs.outputs), + ), + ] + +run_python_buildscript = rule( + run_python_buildscript_impl, + attrs = { + "script": attr.label( + doc = "The script to execute.", + allow_single_file=True, + ), + "args" : attr.string_list( + doc = "Args to pass to the script." + ), + "srcs" : attr.label_list( + doc = "Supporting scripts which may be imported.", + allow_files=True, + ), + "outputs" : attr.output_list( + doc = "Output files that will be generated by this script.", + ), + "py_deps" : attr.label_list( + doc = "Python modules that should be imported.", + providers = [PyInfo], + ), + }, + doc = "Run a python script that may import modules.", + toolchains = ["@bazel_tools//tools/python:toolchain_type"], + fragments = ["py"] +) \ No newline at end of file diff --git a/bazel/platforms/BUILD.bazel b/bazel/platforms/BUILD.bazel index 9596d06b862..41d0d6a034c 100644 --- a/bazel/platforms/BUILD.bazel +++ b/bazel/platforms/BUILD.bazel @@ -14,6 +14,7 @@ package(default_visibility = ["//visibility:public"]) # debian gcc based image contains the base our toolchain needs (glibc version and build-essentials) # https://hub.docker.com/layers/library/gcc/12.3-bookworm/images/sha256-6a3a5694d10299dbfb8747b98621abf4593bb54a5396999caa013cba0e17dd4f?context=explore "container-image": "docker://docker.io/library/gcc@sha256:6a3a5694d10299dbfb8747b98621abf4593bb54a5396999caa013cba0e17dd4f", + "dockerNetwork": "standard" } ) for compiler in ["clang", "gcc"] diff --git a/bazel/toolchains/python_toolchain.BUILD b/bazel/toolchains/python_toolchain.BUILD new file mode 100644 index 00000000000..af9aea235a5 --- /dev/null +++ b/bazel/toolchains/python_toolchain.BUILD @@ -0,0 +1,35 @@ +load("@bazel_tools//tools/python:toolchain.bzl", "py_runtime_pair") + +filegroup( + name = "files", + srcs = glob(["**/*"]), + visibility = ["//visibility:public"], +) + +filegroup( + name = "interpreter", + srcs = ["{interpreter_path}"], + visibility = ["//visibility:public"], +) + +py_runtime( + name = "py_runtime", + files = [":files"], + interpreter = ":interpreter", + python_version = "PY3", + visibility = ["//visibility:public"], +) + +py_runtime_pair( + name = "py_runtime_pair", + py2_runtime = None, + py3_runtime = ":py_runtime", +) + +toolchain( + name = "python_toolchain", + exec_compatible_with = [{constraints}], + target_compatible_with = [{constraints}], + toolchain = ":py_runtime_pair", + toolchain_type = "@bazel_tools//tools/python:toolchain_type", +) \ No newline at end of file diff --git a/bazel/toolchains/python_toolchain.bzl b/bazel/toolchains/python_toolchain.bzl new file mode 100644 index 00000000000..e11ec227569 --- /dev/null +++ b/bazel/toolchains/python_toolchain.bzl @@ -0,0 +1,199 @@ +"""Repository rules for rules_py_simple""" + +_OS_MAP = { + "macos": "@platforms//os:osx", + "linux": "@platforms//os:linux", + "windows": "@platforms//os:windows", +} + +_ARCH_MAP = { + "amd64": "@platforms//cpu:x86_64", + "aarch64": "@platforms//cpu:arm64", + "x86_64": "@platforms//cpu:x86_64", +} + +URLS_MAP = { + "linux_aarch64":{ + "sha": "c7573fdb00239f86b22ea0e8e926ca881d24fde5e5890851339911d76110bc35", + "url": "https://github.com/indygreg/python-build-standalone/releases/download/20230507/cpython-3.10.11+20230507-aarch64-unknown-linux-gnu-install_only.tar.gz", + "interpreter_path": "bin/python3", + }, + "linux_amd64":{ + "sha": "c5bcaac91bc80bfc29cf510669ecad12d506035ecb3ad85ef213416d54aecd79", + "url": "https://github.com/indygreg/python-build-standalone/releases/download/20230507/cpython-3.10.11+20230507-x86_64-unknown-linux-gnu-install_only.tar.gz", + "interpreter_path": "bin/python3", + }, + "windows_amd64":{ + "sha": "97ebca93a928802f421387dcc6ec5403a3e513f43c2df35b7c3e3cca844d79d0", + "url": "https://github.com/indygreg/python-build-standalone/releases/download/20230507/cpython-3.10.11+20230507-x86_64-pc-windows-msvc-static-install_only.tar.gz", + "interpreter_path": "python3.exe", + }, + "macos_aarch64":{ + "sha": "8348bc3c2311f94ec63751fb71bd0108174be1c4def002773cf519ee1506f96f", + "url": "https://github.com/indygreg/python-build-standalone/releases/download/20230507/cpython-3.10.11+20230507-aarch64-apple-darwin-install_only.tar.gz", + "interpreter_path": "bin/python3", + }, + "macos_x86_64":{ + "sha": "bd3fc6e4da6f4033ebf19d66704e73b0804c22641ddae10bbe347c48f82374ad", + "url": "https://github.com/indygreg/python-build-standalone/releases/download/20230507/cpython-3.10.11+20230507-x86_64-apple-darwin-install_only.tar.gz", + "interpreter_path": "bin/python3", + } + + } + +def _py_download(ctx): + """ + Downloads and builds a Python distribution. + + Args: + ctx: Repository context. + """ + + + if ctx.attr.os: + os = ctx.attr.os + else: + if "win" in ctx.os.name: + os = "windows" + elif "mac" in ctx.os.name: + os = "macos" + else: + os = "linux" + + if ctx.attr.arch: + arch = ctx.attr.arch + else: + arch = ctx.os.arch + + if ctx.attr.urls: + urls = ctx.attr.urls + sha = ctx.attr.sha256 + interpreter_path = ctx.attr.interpreter_path + else: + platform_info = URLS_MAP["{os}_{arch}".format(os=os, arch=arch)] + urls = platform_info['url'] + sha = platform_info['sha'] + interpreter_path = platform_info['interpreter_path'] + + ctx.report_progress("downloading python") + ctx.download_and_extract( + url = urls, + sha256 = sha, + stripPrefix = "python", + ) + + ctx.report_progress("generating build file") + os_constraint = _OS_MAP[os] + arch_constraint = _ARCH_MAP[arch] + + constraints = [os_constraint, arch_constraint] + + # So Starlark doesn't throw an indentation error when this gets injected. + constraints_str = ",\n ".join(['"%s"' % c for c in constraints]) + + # Inject our string substitutions into the BUILD file template, and drop said BUILD file in the WORKSPACE root of the repository. + substitutions = { + "{constraints}": constraints_str, + "{interpreter_path}": interpreter_path, + } + + ctx.template( + "BUILD.bazel", + ctx.attr.build_tpl, + substitutions = substitutions, + ) + + return None + +py_download = repository_rule( + implementation = _py_download, + attrs = { + "urls": attr.string_list( + doc = "String list of mirror URLs where the Python distribution can be downloaded.", + ), + "sha256": attr.string( + doc = "Expected SHA-256 sum of the archive.", + ), + "os": attr.string( + values = ["macos", "linux", "windows"], + doc = "Host operating system.", + ), + "arch": attr.string( + values = ["amd64", "aarch64"], + doc = "Host architecture.", + ), + "interpreter_path": attr.string( + default = "bin/python3", + doc = "Path you'd expect the python interpreter binary to live.", + ), + "build_tpl": attr.label( + default = "//bazel/toolchains:python_toolchain.BUILD", + doc = "Label denoting the BUILD file template that get's installed in the repo.", + ), + }, +) + +def setup_mongo_python_toolchains(): + + # This will autoselect a toolchain that matches the host environment + # this toolchain is intended be used only for local repository exectutions, + # and will not be registered as a bazel toolchain by omitting from the return + # value below. + py_download( + name = "py_host", + ) + + py_download( + name = "py_linux_arm64", + arch = "aarch64", + os = "linux", + build_tpl = "//bazel/toolchains:python_toolchain.BUILD", + sha256 = "c7573fdb00239f86b22ea0e8e926ca881d24fde5e5890851339911d76110bc35", + urls = ["https://github.com/indygreg/python-build-standalone/releases/download/20230507/cpython-3.10.11+20230507-aarch64-unknown-linux-gnu-install_only.tar.gz"], + ) + + py_download( + name = "py_linux_x86_64", + arch = "amd64", + os = "linux", + build_tpl = "//bazel/toolchains:python_toolchain.BUILD", + sha256 = "c5bcaac91bc80bfc29cf510669ecad12d506035ecb3ad85ef213416d54aecd79", + urls = ["https://github.com/indygreg/python-build-standalone/releases/download/20230507/cpython-3.10.11+20230507-x86_64-unknown-linux-gnu-install_only.tar.gz"], + ) + + py_download( + name = "py_windows_x86_64", + arch = "amd64", + os = "windows", + build_tpl = "//bazel/toolchains:python_toolchain.BUILD", + interpreter_path = "python.exe", + sha256 = "97ebca93a928802f421387dcc6ec5403a3e513f43c2df35b7c3e3cca844d79d0", + urls = ["https://github.com/indygreg/python-build-standalone/releases/download/20230507/cpython-3.10.11+20230507-x86_64-pc-windows-msvc-static-install_only.tar.gz"], + ) + + py_download( + name = "py_macos_arm64", + arch = "aarch64", + os = "macos", + build_tpl = "//bazel/toolchains:python_toolchain.BUILD", + sha256 = "8348bc3c2311f94ec63751fb71bd0108174be1c4def002773cf519ee1506f96f", + urls = ["https://github.com/indygreg/python-build-standalone/releases/download/20230507/cpython-3.10.11+20230507-aarch64-apple-darwin-install_only.tar.gz"], + ) + + py_download( + name = "py_macos_x86_64", + arch = "amd64", + os = "macos", + build_tpl = "//bazel/toolchains:python_toolchain.BUILD", + sha256 ="bd3fc6e4da6f4033ebf19d66704e73b0804c22641ddae10bbe347c48f82374ad", + urls = ["https://github.com/indygreg/python-build-standalone/releases/download/20230507/cpython-3.10.11+20230507-x86_64-apple-darwin-install_only.tar.gz"], + ) + + return ( + "@py_linux_arm64//:python_toolchain", + "@py_linux_x86_64//:python_toolchain", + "@py_windows_x86_64//:python_toolchain", + "@py_macos_arm64//:python_toolchain", + "@py_macos_x86_64//:python_toolchain", + ) + \ No newline at end of file diff --git a/etc/evergreen_yml_components/definitions.yml b/etc/evergreen_yml_components/definitions.yml index 5f5d9d00555..826bbe8fb7c 100644 --- a/etc/evergreen_yml_components/definitions.yml +++ b/etc/evergreen_yml_components/definitions.yml @@ -147,7 +147,7 @@ variables: - &compile_bazel_task_group_template name: compile_bazel_task_group_template - max_hosts: 1 + max_hosts: -1 tasks: [] setup_group_can_fail_task: true setup_group: @@ -1378,6 +1378,17 @@ functions: args: - "src/evergreen/bazel_compile.sh" + "bazel compile (msvc)": + - *get_version_expansions + - *apply_version_expansions + - *f_expansions_write + - command: subprocess.exec + type: test + params: + binary: bash + args: + - "src/evergreen/bazel_compile.sh" + "bazel run": - *get_version_expansions - *apply_version_expansions @@ -2690,6 +2701,19 @@ tasks: targets: src/mongo/db/commands:fsync_locked compiler: clang +- name: bazel_py_hello_world + tags: [] + depends_on: + - name: version_expansions_gen + variant: generate-tasks-for-version + commands: + # TODO SERVER-81038: Remove "fetch bazel" once bazelisk is self-hosted. + - func: "fetch bazel" + - func: "bazel compile (gcc)" + vars: + targets: //bazel:py_hello_world + compiler: gcc + - name: compile_bazel_dist_test_windows tags: [] depends_on: @@ -2740,6 +2764,7 @@ tasks: vars: output: build/fast/mongo/db/commands/libfsync_locked.a + # Validates that the bazel mongo toolchain can be used to compile targets that # contain C and asssembly source code files. # TODO(SERVER-82195): simplify this to avoid duplication of parameters in other tests. @@ -9367,11 +9392,13 @@ task_groups: - compile_bazel_program - compile_ninja_bazel - run_bazel_program + - bazel_py_hello_world - <<: *compile_bazel_task_group_template name: compile_bazel_windows_TG tasks: - compile_bazel_dist_test_windows + - bazel_py_hello_world - run_bazel_program_windows - compile_ninja_bazel @@ -9379,6 +9406,7 @@ task_groups: name: compile_bazel_macos_TG tasks: - compile_bazel_dist_test_macos + - bazel_py_hello_world - <<: *compile_task_group_template name: compile_upload_benchmarks_TG diff --git a/poetry.lock b/poetry.lock index dc3184ecde0..75ca8ebada0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4264,4 +4264,4 @@ libdeps = ["cxxfilt", "eventlet", "flask", "flask-cors", "gevent", "lxml", "prog [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "6dc5eb9bb96c4d40d1bcd6e492002ddd78fb08036af56f05534c2743b191a130" +content-hash = "1392e467e860501cdd4e6828ae72fb52c58db41a091d0183a0032b18a896019f" diff --git a/pyproject.toml b/pyproject.toml index e58e5be7686..f9870457704 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,8 +105,8 @@ tqdm = "*" colorama = "^0.4.6" [tool.poetry.group.platform.dependencies] -pypiwin32 = { version = ">=223", markers = "platform_machine == 'win32'" } -pywin32 = { version = ">=225", markers = "platform_machine == 'win32'" } +pypiwin32 = { version = ">=223", markers = "sys_platform == 'win32'" } +pywin32 = { version = ">=225", markers = "sys_platform == 'win32'" } cryptography = [ { version = "2.3", markers = "platform_machine == 's390x' or platform_machine == 'ppc64le'" }, # Needed for oauthlib to use RSAAlgorithm # Version locked - see SERVER-36618 { version = "36.0.2", markers = "platform_machine != 's390x' and platform_machine != 'ppc64le'" },