SERVER-102823 handle multiple clang tidy configs (#40036)

GitOrigin-RevId: a65882f2162af611a89a37d36ae0df5356dbbe42
This commit is contained in:
Daniel Moody
2025-08-13 16:41:45 -05:00
committed by MongoDB Bot
parent 886b418b06
commit 651c1f3030
12 changed files with 421 additions and 21 deletions

View File

@@ -112,7 +112,9 @@ common --modify_execution_info=^(TestRunner|CppLink|CppArchive|SolibSymlink|Extr
# Global clang tidy flags to avoid invalidating analysis cache between clang-tidy / regular runs
common --@bazel_clang_tidy//:clang_tidy_excludes=".pb.cc,icu_init.cpp"
common --@bazel_clang_tidy//:clang_tidy_additional_configs=//:clang_tidy_config_files
common --@bazel_clang_tidy//:clang_tidy_config=//:clang_tidy_config_strict
common --@bazel_clang_tidy//:clang_tidy_merge_tool=//bazel:merge_tidy_configs
common --@bazel_clang_tidy//:clang_tidy_executable=//:clang_tidy
common --@bazel_clang_tidy//:clang_tidy_additional_deps=//:toolchain_files
common --@bazel_clang_tidy//:clang_tidy_plugin_deps=//src/mongo/tools/mongo_tidy_checks:mongo_tidy_checks

2
.gitignore vendored
View File

@@ -260,7 +260,7 @@ all_feature_flags.txt
# generated by clang-tidy buildscripts
clang_tidy_fixes
.clang-tidy
/.clang-tidy
#SCons runtime configuration
scons_env.env

View File

@@ -1,16 +1,16 @@
load("@npm//:defs.bzl", "npm_link_all_packages")
load("@aspect_rules_js//npm:defs.bzl", "npm_link_package")
load("//bazel/install_rules:install_rules.bzl", "TEST_TAGS", "extensions_with_config", "mongo_install")
load("//bazel/install_rules:bolt.bzl", "bolt_instrument", "bolt_optimize")
load("//bazel:mongo_src_rules.bzl", "mongo_cc_binary")
load("//bazel/toolchains/cc/mongo_linux:mongo_toolchain.bzl", "setup_mongo_toolchain_aliases")
load("//bazel/toolchains/cc/mongo_linux:mongo_gdb.bzl", "setup_gdb_toolchain_aliases")
load("//bazel/config:render_template.bzl", "render_template")
load("@npm//:eslint/package_json.bzl", eslint_bin = "bin")
load("//bazel:mongo_js_rules.bzl", "mongo_js_library")
load("@npm//:prettier/package_json.bzl", prettier = "bin")
load("@aspect_bazel_lib//lib:copy_to_directory.bzl", "copy_to_directory")
load("@aspect_rules_js//npm:defs.bzl", "npm_link_package")
load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
load("@npm//:defs.bzl", "npm_link_all_packages")
load("@npm//:eslint/package_json.bzl", eslint_bin = "bin")
load("@npm//:prettier/package_json.bzl", prettier = "bin")
load("//bazel:mongo_js_rules.bzl", "mongo_js_library")
load("//bazel:mongo_src_rules.bzl", "mongo_cc_binary")
load("//bazel/config:render_template.bzl", "render_template")
load("//bazel/install_rules:bolt.bzl", "bolt_instrument", "bolt_optimize")
load("//bazel/install_rules:install_rules.bzl", "TEST_TAGS", "extensions_with_config", "mongo_install")
load("//bazel/toolchains/cc/mongo_linux:mongo_gdb.bzl", "setup_gdb_toolchain_aliases")
load("//bazel/toolchains/cc/mongo_linux:mongo_toolchain.bzl", "setup_mongo_toolchain_aliases")
package(
default_visibility = ["//visibility:public"],
@@ -540,3 +540,8 @@ bolt_optimize(
"//conditions:default": ["@platforms//:incompatible"],
}),
)
filegroup(
name = "clang_tidy_config_files",
srcs = [],
)

View File

@@ -15,12 +15,12 @@ http_file = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_
http_archive(
name = "bazel_clang_tidy",
integrity = "sha256-A+TGGfuHdS7vWT20eyAEiA3u4YXFtuzsJDcpTEmDMS0=",
strip_prefix = "bazel_clang_tidy-10c4bf70a8946789e3932f30e29dfba2dfb78e67",
integrity = "sha256-4jSWCC9GXwk4VeqLNZl6heYZUsUqvC8EP1OO+JEVDSA=",
strip_prefix = "bazel_clang_tidy-908bf1af4e5b645a7a897b43552c5e482195ee25",
urls = [
# Implements retry by relisting each url multiple times to be used as a failover.
# TODO(SERVER-86719): Re-implement http_archive to allow sleeping between retries
"https://github.com/mongodb-forks/bazel_clang_tidy/archive/10c4bf70a8946789e3932f30e29dfba2dfb78e67.tar.gz",
"https://github.com/mongodb-forks/bazel_clang_tidy/archive/908bf1af4e5b645a7a897b43552c5e482195ee25.tar.gz",
] * 5,
)

View File

@@ -1,3 +1,5 @@
load("@poetry//:dependencies.bzl", "dependency")
package(default_visibility = ["//visibility:public"])
# Expose script for external usage through bazel.
@@ -16,3 +18,18 @@ py_binary(
name = "bazelisk",
srcs = ["bazelisk.py"],
)
py_binary(
name = "merge_tidy_configs",
srcs = [
"merge_tidy_configs.py",
],
main = "merge_tidy_configs.py",
visibility = ["//visibility:public"],
deps = [
dependency(
"pyyaml",
group = "core",
),
],
)

View File

@@ -23,7 +23,7 @@ py_binary(
main = "rules_lint_format_wrapper.py",
visibility = ["//visibility:public"],
deps = [
"//buildscripts:unittest_grouper",
"//buildscripts:bazel_custom_formatter",
],
)

View File

@@ -4,7 +4,7 @@ import pathlib
import subprocess
from typing import List, Union
from buildscripts.unittest_grouper import validate_bazel_groups
from buildscripts.bazel_custom_formatter import validate_bazel_groups, validate_clang_tidy_configs
def _git_distance(args: list) -> int:
@@ -204,6 +204,7 @@ def main() -> int:
return any(file.endswith(".bazel") or "BUILD" in file for file in files)
if files_to_format_contains_bazel_file(files_to_format):
validate_clang_tidy_configs(generate_report=True, fix=not args.check)
validate_bazel_groups(generate_report=True, fix=not args.check)
if files_to_format != "all":

186
bazel/merge_tidy_configs.py Normal file
View File

@@ -0,0 +1,186 @@
#!/usr/bin/env python3
"""
Merge clang-tidy config files:
- Baseline + zero or more additional config files.
- Checks: concatenated left→right (later entries can disable earlier via -pattern).
- CheckOptions: merged by key (later configs override).
- Other keys: deep-merge (dicts) or last-wins (scalars).
Requires: PyYAML
"""
from __future__ import annotations
import argparse
import pathlib
from typing import Any, Dict, List
import yaml
# --------------------------- YAML helpers ---------------------------
def load_yaml(file_path: str | pathlib.Path) -> Dict[str, Any]:
path = pathlib.Path(file_path)
if not path.exists():
raise SystemExit(f"Error: Config file '{file_path}' not found.")
with open(path, "r", encoding="utf-8") as f:
return yaml.safe_load(f) or {}
def split_checks_to_list(value: Any) -> List[str]:
"""Normalize a Checks value (string or list) into a flat list of tokens."""
if value is None:
return []
parts: List[str] = []
if isinstance(value, str):
parts = [s.strip() for s in value.split(",")]
elif isinstance(value, list):
for item in value:
parts.extend([s.strip() for s in str(item).split(",")])
return [s for s in parts if s]
def merge_checks_into_config(target_config: Dict[str, Any],
incoming_config: Dict[str, Any]) -> None:
"""Append incoming Checks onto target Checks (string-concatenated)."""
accumulated = split_checks_to_list(target_config.get("Checks"))
additions = split_checks_to_list(incoming_config.get("Checks"))
if additions:
target_config["Checks"] = ",".join(accumulated + additions)
def check_options_list_to_map(value: Any) -> Dict[str, Any]:
"""Transform CheckOptions list[{key,value}] into a dict[key]=value."""
out: Dict[str, Any] = {}
if isinstance(value, list):
for item in value:
if isinstance(item, dict) and "key" in item:
out[item["key"]] = item.get("value")
return out
def merge_check_options_into_config(target_config: Dict[str, Any],
incoming_config: Dict[str, Any]) -> None:
"""
Merge CheckOptions so later configs override earlier by 'key'.
Stores back as list[{key,value}] sorted by key for determinism.
"""
base = check_options_list_to_map(target_config.get("CheckOptions"))
override = check_options_list_to_map(incoming_config.get("CheckOptions"))
if override:
base.update(override) # later wins
target_config["CheckOptions"] = [
{"key": k, "value": v} for k, v in sorted(base.items())
]
def deep_merge_dicts(base: Any, override: Any) -> Any:
"""Generic deep merge for everything except Checks/CheckOptions."""
if isinstance(base, dict) and isinstance(override, dict):
result = dict(base)
for key, o_val in override.items():
if key in ("Checks", "CheckOptions"):
# handled by specialized mergers
continue
b_val = result.get(key)
if isinstance(b_val, dict) and isinstance(o_val, dict):
result[key] = deep_merge_dicts(b_val, o_val)
else:
result[key] = o_val
return result
return override
# --------------------------- path helpers ---------------------------
def is_ancestor_directory(ancestor: pathlib.Path, descendant: pathlib.Path) -> bool:
"""
True if 'ancestor' is the same as or a parent of 'descendant'.
Resolution ensures symlinks and relative parts are normalized.
"""
try:
ancestor = ancestor.resolve()
descendant = descendant.resolve()
except FileNotFoundError:
# If either path doesn't exist yet, still resolve purely lexicaly
ancestor = ancestor.absolute()
descendant = descendant.absolute()
return ancestor == descendant or ancestor in descendant.parents
def filter_and_sort_config_paths(
config_paths: list[str | pathlib.Path],
scope_directory: str | None
) -> list[pathlib.Path]:
"""
Keep only config files whose parent directory is an ancestor
of the provided scope directory.
Sort shallow → deep so deeper configs apply later and override earlier ones.
If scope_directory is None, keep paths in the order given.
"""
config_paths = [pathlib.Path(p) for p in config_paths]
if not scope_directory:
return config_paths
workspace_root = pathlib.Path.cwd().resolve()
scope_abs = (workspace_root / scope_directory).resolve()
selected: list[tuple[int, pathlib.Path]] = []
for cfg in config_paths:
parent_dir = cfg.parent
if is_ancestor_directory(parent_dir, scope_abs):
# Depth is number of path components from root
selected.append((len(parent_dir.parts), cfg.resolve()))
# Sort by depth ascending so root-most files merge first
selected.sort(key=lambda t: t[0])
return [cfg for _, cfg in selected]
# --------------------------- main ---------------------------
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--baseline", required=True, help="Baseline clang-tidy YAML.")
parser.add_argument(
"--config-file",
dest="config_files",
action="append",
default=[],
help="Additional clang-tidy config file(s). May be repeated.",
)
parser.add_argument(
"--scope-dir",
help="Repo-relative directory; only config files in its ancestor dirs are merged.",
)
parser.add_argument("--out", required=True, help="Output merged YAML path.")
args = parser.parse_args()
merged_config: Dict[str, Any] = load_yaml(args.baseline)
config_paths: List[pathlib.Path] = filter_and_sort_config_paths(
args.config_files, args.scope_dir
)
for config_path in config_paths:
incoming_config = load_yaml(config_path)
# clang-tidy special merges first:
merge_checks_into_config(merged_config, incoming_config)
merge_check_options_into_config(merged_config, incoming_config)
# then generic merge:
merged_config = deep_merge_dicts(merged_config, incoming_config)
merged_config["Checks"] = ",".join(split_checks_to_list(merged_config.get("Checks")))
output_path = pathlib.Path(args.out)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w", encoding="utf-8") as f:
yaml.safe_dump(merged_config, f, sort_keys=True)
if __name__ == "__main__":
main()

View File

@@ -336,6 +336,17 @@ remap_linker_inputs_ownership = rule(
fragments = ["cpp"],
)
def tidy_config_filegroup():
if native.existing_rule("clang_tidy_config") == None:
native.filegroup(
name = "clang_tidy_config",
srcs = native.glob(
[".clang-tidy"],
allow_empty = True,
),
visibility = ["//visibility:public"],
)
def mongo_cc_library(
name,
srcs = [],
@@ -498,6 +509,8 @@ def mongo_cc_library(
header_deps = header_deps,
)
tidy_config_filegroup()
# Create a cc_library entry to generate a shared archive of the target.
cc_library(
name = name + SHARED_ARCHIVE_SUFFIX,

View File

@@ -79,9 +79,9 @@ py_library(
)
py_library(
name = "unittest_grouper",
name = "bazel_custom_formatter",
srcs = [
"unittest_grouper.py",
"bazel_custom_formatter.py",
],
visibility = ["//visibility:public"],
deps = [

View File

@@ -8,15 +8,16 @@ import subprocess
import sys
import time
import urllib.request
from collections import deque
from pathlib import Path
from typing import Dict, List
from retry import retry
from buildscripts.simple_report import make_report, put_report, try_combine_reports
sys.path.append(".")
from buildscripts.install_bazel import install_bazel
from buildscripts.simple_report import make_report, put_report, try_combine_reports
RELEASE_URL = "https://github.com/bazelbuild/buildtools/releases/download/v7.3.1/"
@@ -146,6 +147,72 @@ def find_multiple_groups(test, groups):
return tagged_groups
def iter_clang_tidy_files(root: str | Path) -> list[Path]:
"""Return a list of repo-relative Paths to '.clang-tidy' files.
- Uses os.scandir for speed
- Does NOT follow symlinks
"""
root = Path(root).resolve()
results: list[Path] = []
stack = deque([root])
while stack:
current = stack.pop()
try:
with os.scandir(current) as it:
for entry in it:
name = entry.name
if entry.is_dir(follow_symlinks=False):
stack.append(Path(entry.path))
elif entry.is_file(follow_symlinks=False) and name == ".clang-tidy":
# repo-relative path
results.append(Path(entry.path).resolve().relative_to(root))
except PermissionError:
continue
return results
def validate_clang_tidy_configs(generate_report, fix):
buildozer = download_buildozer()
mongo_dir = "src/mongo"
tidy_files = iter_clang_tidy_files("src/mongo")
p = subprocess.run(
[buildozer, "print label srcs", "//:clang_tidy_config_files"],
capture_output=True,
text=True,
)
tidy_targets = None
for line in p.stdout.splitlines():
if line.startswith("//") and line.endswith("]"):
tokens = line.split("[")
tidy_targets = tokens[1][:-1].split(" ")
break
if tidy_targets is None:
print(p.stderr)
raise Exception(f"could not parse tidy config targets from '{p.stdout}'")
if tidy_targets == ['']:
tidy_targets = []
all_targets = []
for tidy_file in tidy_files:
tidy_file_target = "//" + os.path.dirname(os.path.join(mongo_dir, tidy_file)) + ":clang_tidy_config"
all_targets.append(tidy_file_target)
if all_targets != tidy_targets:
msg = f"Incorrect clang tidy config targets: {all_targets} != {tidy_targets}"
print(msg)
if generate_report:
report = make_report("//:clang_tidy_config_files", msg, 1)
try_combine_reports(report)
put_report(report)
if fix:
subprocess.run([buildozer, f"set srcs {' '.join(all_targets)}", "//:clang_tidy_config_files"])
def validate_bazel_groups(generate_report, fix):
buildozer = download_buildozer()
@@ -266,6 +333,7 @@ def main():
parser.add_argument("--generate-report", default=False, action="store_true")
parser.add_argument("--fix", default=False, action="store_true")
args = parser.parse_args()
validate_clang_tidy_configs(args.generate_report, args.fix)
validate_bazel_groups(args.generate_report, args.fix)

View File

@@ -0,0 +1,108 @@
import os
import pathlib
import tempfile
import unittest
import yaml
from bazel.merge_tidy_configs import (
deep_merge_dicts,
filter_and_sort_config_paths,
is_ancestor_directory,
load_yaml,
merge_check_options_into_config,
merge_checks_into_config,
split_checks_to_list,
)
class TestClangTidyMergeHelpers(unittest.TestCase):
def test_split_checks_to_list_from_str(self):
self.assertEqual(
split_checks_to_list("foo, bar ,baz"),
["foo", "bar", "baz"]
)
def test_split_checks_to_list_from_list(self):
self.assertEqual(
split_checks_to_list(["a, b", "c"]),
["a", "b", "c"]
)
def test_merge_checks_into_config(self):
base = {"Checks": "a,b"}
incoming = {"Checks": "c,d"}
merge_checks_into_config(base, incoming)
self.assertEqual(base["Checks"], "a,b,c,d")
def test_merge_check_options_into_config(self):
base = {"CheckOptions": [{"key": "A", "value": "1"}]}
incoming = {"CheckOptions": [{"key": "B", "value": "2"}]}
merge_check_options_into_config(base, incoming)
self.assertEqual(
base["CheckOptions"],
[{"key": "A", "value": "1"}, {"key": "B", "value": "2"}]
)
def test_merge_check_options_override(self):
base = {"CheckOptions": [{"key": "A", "value": "1"}]}
incoming = {"CheckOptions": [{"key": "A", "value": "2"}]}
merge_check_options_into_config(base, incoming)
self.assertEqual(base["CheckOptions"], [{"key": "A", "value": "2"}])
def test_deep_merge_dicts(self):
base = {"Outer": {"Inner": 1}, "Keep": True}
override = {"Outer": {"Added": 2}, "New": False}
merged = deep_merge_dicts(base, override)
self.assertEqual(
merged,
{"Outer": {"Inner": 1, "Added": 2}, "Keep": True, "New": False}
)
def test_is_ancestor_directory_true(self):
tmpdir = pathlib.Path(tempfile.mkdtemp())
child = tmpdir / "subdir"
child.mkdir()
self.assertTrue(is_ancestor_directory(tmpdir, child))
def test_is_ancestor_directory_false(self):
tmp1 = pathlib.Path(tempfile.mkdtemp())
tmp2 = pathlib.Path(tempfile.mkdtemp())
self.assertFalse(is_ancestor_directory(tmp1, tmp2))
def test_filter_and_sort_config_paths_no_scope(self):
files = ["/tmp/file1", "/tmp/file2"]
res = filter_and_sort_config_paths(files, None)
self.assertEqual([pathlib.Path("/tmp/file1"), pathlib.Path("/tmp/file2")], res)
def test_filter_and_sort_config_paths_with_scope(self):
tmpdir = pathlib.Path(tempfile.mkdtemp())
(tmpdir / "a").mkdir()
cfg_root = tmpdir / "root.yaml"
cfg_child = tmpdir / "a" / "child.yaml"
cfg_root.write_text("Checks: a")
cfg_child.write_text("Checks: b")
old_cwd = pathlib.Path.cwd()
try:
# Simulate repo root being tmpdir
os.chdir(tmpdir)
res = filter_and_sort_config_paths([cfg_root, cfg_child], "a")
finally:
os.chdir(old_cwd)
self.assertEqual([p.name for p in res], ["root.yaml", "child.yaml"])
def test_load_yaml_empty_file(self):
tmpfile = pathlib.Path(tempfile.mktemp())
tmpfile.write_text("")
self.assertEqual(load_yaml(tmpfile), {})
def test_load_yaml_valid_yaml(self):
tmpfile = pathlib.Path(tempfile.mktemp())
yaml.safe_dump({"a": 1}, open(tmpfile, "w"))
self.assertEqual(load_yaml(tmpfile), {"a": 1})
if __name__ == "__main__":
unittest.main()