SERVER-108845 switch back to rapidyaml (#39670)
GitOrigin-RevId: 90eb45aaa0f42fe62b7ae8180e82a9c6e17f9ce5
This commit is contained in:
committed by
MongoDB Bot
parent
042618284a
commit
b92a12c682
@@ -5,6 +5,7 @@ py_library(
|
|||||||
srcs = [
|
srcs = [
|
||||||
"__init__.py",
|
"__init__.py",
|
||||||
"evergreen.py",
|
"evergreen.py",
|
||||||
|
"yaml_load.py",
|
||||||
],
|
],
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = [
|
deps = [
|
||||||
|
|||||||
@@ -15,7 +15,8 @@ import sys
|
|||||||
from typing import Any, Dict, List, Optional, Set
|
from typing import Any, Dict, List, Optional, Set
|
||||||
|
|
||||||
import structlog
|
import structlog
|
||||||
import yaml
|
|
||||||
|
from buildscripts.ciconfig.yaml_load import yaml_load
|
||||||
|
|
||||||
ENTERPRISE_MODULE_NAME = "enterprise"
|
ENTERPRISE_MODULE_NAME = "enterprise"
|
||||||
ASAN_SIGNATURE = "detect_leaks=1"
|
ASAN_SIGNATURE = "detect_leaks=1"
|
||||||
@@ -80,10 +81,11 @@ def parse_evergreen_file(path, evergreen_binary="evergreen"):
|
|||||||
path, result.stdout, result.stderr
|
path, result.stdout, result.stderr
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
config = yaml.safe_load(result.stdout)
|
config: dict = yaml_load(result.stdout)
|
||||||
else:
|
else:
|
||||||
with open(path, "r", encoding="utf8") as fstream:
|
with open(path, "r", encoding="utf8") as fstream:
|
||||||
config = yaml.safe_load(fstream)
|
data = fstream.read()
|
||||||
|
config: dict = yaml_load(data)
|
||||||
|
|
||||||
return EvergreenProjectConfig(config)
|
return EvergreenProjectConfig(config)
|
||||||
|
|
||||||
|
|||||||
46
buildscripts/ciconfig/yaml_load.py
Normal file
46
buildscripts/ciconfig/yaml_load.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
from typing import Any
|
||||||
|
|
||||||
|
# PyYaml is very easy to use, but it is very slow. This is a problem for us since the main evergreen.yml file is quite large.
|
||||||
|
# PyYaml was taking over 10s to just load the file, which needed to be done every single task and so was a significant bottleneck.
|
||||||
|
# We use the rapidyaml library instead, which is much more low level but much faster (sub 1s to load the same file). This is not a
|
||||||
|
# full drop in replacement for PyYaml and does not fully satisfy the yaml spec, but it is sufficient for our needs.
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
import ryml
|
||||||
|
|
||||||
|
def ryml_to_dict(tree: ryml.Tree, index: int = 0) -> Any:
|
||||||
|
"""Walk through the ryml tree and convert nodes."""
|
||||||
|
if tree.is_map(index):
|
||||||
|
return {
|
||||||
|
str(tree.key(child_index), "utf8"): ryml_to_dict(tree, child_index)
|
||||||
|
for child_index in ryml.children(tree, index)
|
||||||
|
}
|
||||||
|
elif tree.is_seq(index):
|
||||||
|
return [ryml_to_dict(tree, child_index) for child_index in ryml.children(tree, index)]
|
||||||
|
else:
|
||||||
|
decoded_value = tree.val(index).tobytes().decode("utf8")
|
||||||
|
if decoded_value == "true":
|
||||||
|
return True
|
||||||
|
elif decoded_value == "false":
|
||||||
|
return False
|
||||||
|
elif decoded_value == "null" or decoded_value == "~":
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
int_value = int(decoded_value)
|
||||||
|
return int_value
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
float_value = float(decoded_value)
|
||||||
|
return float_value
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return decoded_value
|
||||||
|
|
||||||
|
def yaml_load(data: str) -> dict:
|
||||||
|
"""Safely load YAML data."""
|
||||||
|
return ryml_to_dict(ryml.parse_in_arena(data))
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
from yaml import safe_load as yaml_load # noqa
|
||||||
@@ -808,7 +808,7 @@ class TestEvergreenYML(unittest.TestCase):
|
|||||||
generate_func = task.find_func_command("generate resmoke tasks")
|
generate_func = task.find_func_command("generate resmoke tasks")
|
||||||
if (
|
if (
|
||||||
generate_func is None
|
generate_func is None
|
||||||
or get_dict_value(generate_func, ["vars", "is_jstestfuzz"]) != "true"
|
or get_dict_value(generate_func, ["vars", "is_jstestfuzz"]) is not True
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|||||||
40
poetry.lock
generated
40
poetry.lock
generated
@@ -714,6 +714,22 @@ wrapt = ">=1.10,<2"
|
|||||||
[package.extras]
|
[package.extras]
|
||||||
dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "setuptools", "tox"]
|
dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "setuptools", "tox"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "deprecation"
|
||||||
|
version = "2.1.0"
|
||||||
|
description = "A library to handle automated deprecations"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
groups = ["powercycle-incompatible"]
|
||||||
|
markers = "(platform_machine != \"s390x\" and platform_machine != \"ppc64le\" or platform_machine == \"s390x\" or platform_machine == \"ppc64le\") and platform_system != \"Windows\""
|
||||||
|
files = [
|
||||||
|
{file = "deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a"},
|
||||||
|
{file = "deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
packaging = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "distlib"
|
name = "distlib"
|
||||||
version = "0.3.9"
|
version = "0.3.9"
|
||||||
@@ -2563,7 +2579,7 @@ version = "24.2"
|
|||||||
description = "Core utilities for Python packages"
|
description = "Core utilities for Python packages"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
groups = ["compile", "export", "idl", "testing"]
|
groups = ["compile", "export", "idl", "powercycle-incompatible", "testing"]
|
||||||
markers = "platform_machine != \"s390x\" and platform_machine != \"ppc64le\" or platform_machine == \"s390x\" or platform_machine == \"ppc64le\""
|
markers = "platform_machine != \"s390x\" and platform_machine != \"ppc64le\" or platform_machine == \"s390x\" or platform_machine == \"ppc64le\""
|
||||||
files = [
|
files = [
|
||||||
{file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
|
{file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
|
||||||
@@ -3601,6 +3617,26 @@ files = [
|
|||||||
[package.extras]
|
[package.extras]
|
||||||
all = ["numpy"]
|
all = ["numpy"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rapidyaml"
|
||||||
|
version = "0.0.post1671"
|
||||||
|
description = "Rapid YAML - a library to parse and emit YAML, and do it fast"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
groups = ["powercycle-incompatible"]
|
||||||
|
markers = "(platform_machine != \"s390x\" and platform_machine != \"ppc64le\" or platform_machine == \"s390x\" or platform_machine == \"ppc64le\") and platform_system != \"Windows\""
|
||||||
|
files = []
|
||||||
|
develop = false
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
deprecation = "*"
|
||||||
|
|
||||||
|
[package.source]
|
||||||
|
type = "git"
|
||||||
|
url = "https://github.com/mongodb-forks/rapidyaml.git"
|
||||||
|
reference = "a5d485fd44719e1c03e059177fc1f695fc462b66"
|
||||||
|
resolved_reference = "a5d485fd44719e1c03e059177fc1f695fc462b66"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "referencing"
|
name = "referencing"
|
||||||
version = "0.36.2"
|
version = "0.36.2"
|
||||||
@@ -5527,4 +5563,4 @@ libdeps = ["cxxfilt", "eventlet", "flask", "flask-cors", "gevent", "lxml", "prog
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = ">=3.10,<4.0"
|
python-versions = ">=3.10,<4.0"
|
||||||
content-hash = "5dedf21a2566f81a279d675c3aecc911150080b38f1ae9213ad0773f6c29ff97"
|
content-hash = "50627e8fc8d530805753cda3bea5c4585817f04364ae29fe4f7c23fdf14060fb"
|
||||||
|
|||||||
@@ -75,6 +75,11 @@ typing-extensions = "^4.12.2"
|
|||||||
typer = "^0.12.3"
|
typer = "^0.12.3"
|
||||||
tenacity = "^9.0.0"
|
tenacity = "^9.0.0"
|
||||||
|
|
||||||
|
# specifically rapidyaml is broken on atlas distros with powercycle.
|
||||||
|
# current we exclude this when running poetry install in powercycle.
|
||||||
|
[tool.poetry.group.powercycle-incompatible.dependencies]
|
||||||
|
rapidyaml = {git = "https://github.com/mongodb-forks/rapidyaml.git@master", rev = "a5d485fd44719e1c03e059177fc1f695fc462b66", markers = "platform_system != 'Windows'"}
|
||||||
|
|
||||||
[tool.poetry.group.export.dependencies]
|
[tool.poetry.group.export.dependencies]
|
||||||
pipx = "1.6.0"
|
pipx = "1.6.0"
|
||||||
# TODO: Add in pex as we move forward with this
|
# TODO: Add in pex as we move forward with this
|
||||||
|
|||||||
Reference in New Issue
Block a user