SERVER-108845 switch back to rapidyaml (#39670)

GitOrigin-RevId: 90eb45aaa0f42fe62b7ae8180e82a9c6e17f9ce5
This commit is contained in:
Daniel Moody
2025-08-06 13:32:52 -05:00
committed by MongoDB Bot
parent 042618284a
commit b92a12c682
6 changed files with 96 additions and 6 deletions

View File

@@ -5,6 +5,7 @@ py_library(
srcs = [ srcs = [
"__init__.py", "__init__.py",
"evergreen.py", "evergreen.py",
"yaml_load.py",
], ],
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [

View File

@@ -15,7 +15,8 @@ import sys
from typing import Any, Dict, List, Optional, Set from typing import Any, Dict, List, Optional, Set
import structlog import structlog
import yaml
from buildscripts.ciconfig.yaml_load import yaml_load
ENTERPRISE_MODULE_NAME = "enterprise" ENTERPRISE_MODULE_NAME = "enterprise"
ASAN_SIGNATURE = "detect_leaks=1" ASAN_SIGNATURE = "detect_leaks=1"
@@ -80,10 +81,11 @@ def parse_evergreen_file(path, evergreen_binary="evergreen"):
path, result.stdout, result.stderr path, result.stdout, result.stderr
) )
) )
config = yaml.safe_load(result.stdout) config: dict = yaml_load(result.stdout)
else: else:
with open(path, "r", encoding="utf8") as fstream: with open(path, "r", encoding="utf8") as fstream:
config = yaml.safe_load(fstream) data = fstream.read()
config: dict = yaml_load(data)
return EvergreenProjectConfig(config) return EvergreenProjectConfig(config)

View File

@@ -0,0 +1,46 @@
from typing import Any
# PyYaml is very easy to use, but it is very slow. This is a problem for us since the main evergreen.yml file is quite large.
# PyYaml was taking over 10s to just load the file, which needed to be done every single task and so was a significant bottleneck.
# We use the rapidyaml library instead, which is much more low level but much faster (sub 1s to load the same file). This is not a
# full drop in replacement for PyYaml and does not fully satisfy the yaml spec, but it is sufficient for our needs.
try:
import ryml
def ryml_to_dict(tree: ryml.Tree, index: int = 0) -> Any:
"""Walk through the ryml tree and convert nodes."""
if tree.is_map(index):
return {
str(tree.key(child_index), "utf8"): ryml_to_dict(tree, child_index)
for child_index in ryml.children(tree, index)
}
elif tree.is_seq(index):
return [ryml_to_dict(tree, child_index) for child_index in ryml.children(tree, index)]
else:
decoded_value = tree.val(index).tobytes().decode("utf8")
if decoded_value == "true":
return True
elif decoded_value == "false":
return False
elif decoded_value == "null" or decoded_value == "~":
return None
try:
int_value = int(decoded_value)
return int_value
except ValueError:
pass
try:
float_value = float(decoded_value)
return float_value
except ValueError:
pass
return decoded_value
def yaml_load(data: str) -> dict:
"""Safely load YAML data."""
return ryml_to_dict(ryml.parse_in_arena(data))
except ImportError:
from yaml import safe_load as yaml_load # noqa

View File

@@ -808,7 +808,7 @@ class TestEvergreenYML(unittest.TestCase):
generate_func = task.find_func_command("generate resmoke tasks") generate_func = task.find_func_command("generate resmoke tasks")
if ( if (
generate_func is None generate_func is None
or get_dict_value(generate_func, ["vars", "is_jstestfuzz"]) != "true" or get_dict_value(generate_func, ["vars", "is_jstestfuzz"]) is not True
): ):
continue continue

40
poetry.lock generated
View File

@@ -714,6 +714,22 @@ wrapt = ">=1.10,<2"
[package.extras] [package.extras]
dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "setuptools", "tox"] dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "setuptools", "tox"]
[[package]]
name = "deprecation"
version = "2.1.0"
description = "A library to handle automated deprecations"
optional = false
python-versions = "*"
groups = ["powercycle-incompatible"]
markers = "(platform_machine != \"s390x\" and platform_machine != \"ppc64le\" or platform_machine == \"s390x\" or platform_machine == \"ppc64le\") and platform_system != \"Windows\""
files = [
{file = "deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a"},
{file = "deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff"},
]
[package.dependencies]
packaging = "*"
[[package]] [[package]]
name = "distlib" name = "distlib"
version = "0.3.9" version = "0.3.9"
@@ -2563,7 +2579,7 @@ version = "24.2"
description = "Core utilities for Python packages" description = "Core utilities for Python packages"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["compile", "export", "idl", "testing"] groups = ["compile", "export", "idl", "powercycle-incompatible", "testing"]
markers = "platform_machine != \"s390x\" and platform_machine != \"ppc64le\" or platform_machine == \"s390x\" or platform_machine == \"ppc64le\"" markers = "platform_machine != \"s390x\" and platform_machine != \"ppc64le\" or platform_machine == \"s390x\" or platform_machine == \"ppc64le\""
files = [ files = [
{file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
@@ -3601,6 +3617,26 @@ files = [
[package.extras] [package.extras]
all = ["numpy"] all = ["numpy"]
[[package]]
name = "rapidyaml"
version = "0.0.post1671"
description = "Rapid YAML - a library to parse and emit YAML, and do it fast"
optional = false
python-versions = ">=3.6"
groups = ["powercycle-incompatible"]
markers = "(platform_machine != \"s390x\" and platform_machine != \"ppc64le\" or platform_machine == \"s390x\" or platform_machine == \"ppc64le\") and platform_system != \"Windows\""
files = []
develop = false
[package.dependencies]
deprecation = "*"
[package.source]
type = "git"
url = "https://github.com/mongodb-forks/rapidyaml.git"
reference = "a5d485fd44719e1c03e059177fc1f695fc462b66"
resolved_reference = "a5d485fd44719e1c03e059177fc1f695fc462b66"
[[package]] [[package]]
name = "referencing" name = "referencing"
version = "0.36.2" version = "0.36.2"
@@ -5527,4 +5563,4 @@ libdeps = ["cxxfilt", "eventlet", "flask", "flask-cors", "gevent", "lxml", "prog
[metadata] [metadata]
lock-version = "2.1" lock-version = "2.1"
python-versions = ">=3.10,<4.0" python-versions = ">=3.10,<4.0"
content-hash = "5dedf21a2566f81a279d675c3aecc911150080b38f1ae9213ad0773f6c29ff97" content-hash = "50627e8fc8d530805753cda3bea5c4585817f04364ae29fe4f7c23fdf14060fb"

View File

@@ -75,6 +75,11 @@ typing-extensions = "^4.12.2"
typer = "^0.12.3" typer = "^0.12.3"
tenacity = "^9.0.0" tenacity = "^9.0.0"
# specifically rapidyaml is broken on atlas distros with powercycle.
# current we exclude this when running poetry install in powercycle.
[tool.poetry.group.powercycle-incompatible.dependencies]
rapidyaml = {git = "https://github.com/mongodb-forks/rapidyaml.git@master", rev = "a5d485fd44719e1c03e059177fc1f695fc462b66", markers = "platform_system != 'Windows'"}
[tool.poetry.group.export.dependencies] [tool.poetry.group.export.dependencies]
pipx = "1.6.0" pipx = "1.6.0"
# TODO: Add in pex as we move forward with this # TODO: Add in pex as we move forward with this