2024-04-03 16:12:53 -07:00
import argparse
import os
import pathlib
import subprocess
2026-02-19 15:57:12 -05:00
from typing import Union
2024-04-03 16:12:53 -07:00
2026-01-27 10:59:18 -06:00
from git import Repo
from utils . evergreen_git import get_mongodb_remote
2025-10-14 15:10:55 -05:00
from buildscripts . bazel_custom_formatter import (
validate_bazel_groups ,
validate_clang_tidy_configs ,
validate_idl_naming ,
validate_private_headers ,
)
2024-04-03 16:12:53 -07:00
2024-12-31 14:52:28 -08:00
2025-05-29 06:06:47 -07:00
def _git_distance ( args : list ) - > int :
command = [ " git " , " rev-list " , " --count " ] + args
2025-06-02 20:59:27 -07:00
try :
result = subprocess . run ( command , capture_output = True , text = True , check = True )
except subprocess . CalledProcessError as e :
print ( f " Error running git command: { ' ' . join ( command ) } " )
print ( f " stderr: { e . stderr . strip ( ) } " )
print ( f " stdout: { e . stdout . strip ( ) } " )
raise
2025-05-29 06:06:47 -07:00
return int ( result . stdout . strip ( ) )
def _get_merge_base ( args : list ) - > str :
command = [ " git " , " merge-base " ] + args
result = subprocess . run ( command , capture_output = True , text = True , check = True )
return result . stdout . strip ( )
def _git_diff ( args : list ) - > str :
command = [ " git " , " diff " ] + args
result = subprocess . run ( command , capture_output = True , text = True , check = True )
return result . stdout . strip ( ) + os . linesep
2025-06-18 13:05:12 -07:00
def _git_unstaged_files ( ) - > str :
command = [ " git " , " ls-files " , " --others " , " --exclude-standard " ]
result = subprocess . run ( command , capture_output = True , text = True , check = True )
return result . stdout . strip ( ) + os . linesep
2026-02-19 15:57:12 -05:00
def _get_files_changed_since_fork_point ( origin_branch : str = " origin/master " ) - > list [ str ] :
2025-05-29 06:06:47 -07:00
""" Query git to get a list of files in the repo from a diff. """
# There are 3 diffs we run:
# 1. List of commits between origin/master and HEAD of current branch
# 2. Cached/Staged files (--cached)
# 3. Working Tree files git tracks
fork_point = _get_merge_base ( [ " HEAD " , origin_branch ] )
diff_files = _git_diff ( [ " --name-only " , f " { fork_point } ..HEAD " ] )
diff_files + = _git_diff ( [ " --name-only " , " --cached " ] )
diff_files + = _git_diff ( [ " --name-only " ] )
2025-06-18 13:05:12 -07:00
diff_files + = _git_unstaged_files ( )
2025-05-29 06:06:47 -07:00
file_set = {
os . path . normpath ( os . path . join ( os . curdir , line . rstrip ( ) ) )
for line in diff_files . splitlines ( )
if line
}
return list ( file_set )
2025-02-20 11:38:12 -08:00
def run_rules_lint (
2025-05-29 06:06:47 -07:00
rules_lint_format_path : pathlib . Path ,
rules_lint_format_check_path : pathlib . Path ,
check : bool ,
2026-02-19 15:57:12 -05:00
files_to_format : Union [ list [ str ] , str ] = " all " ,
2025-02-20 11:38:12 -08:00
) - > bool :
2025-02-19 11:46:14 -08:00
try :
if check :
2025-02-20 11:38:12 -08:00
command = [ str ( rules_lint_format_check_path ) ]
2025-02-19 11:46:14 -08:00
print ( " Running rules_lint formatter in check mode " )
else :
2025-02-20 11:38:12 -08:00
command = [ str ( rules_lint_format_path ) ]
2025-02-19 11:46:14 -08:00
print ( " Running rules_lint formatter " )
2025-05-29 06:06:47 -07:00
if files_to_format != " all " :
command + = files_to_format
2025-02-19 11:46:14 -08:00
repo_path = os . path . dirname ( os . path . dirname ( os . path . dirname ( os . path . abspath ( __file__ ) ) ) )
2025-02-20 11:38:12 -08:00
subprocess . run ( command , check = True , env = os . environ , cwd = repo_path )
2025-02-19 11:46:14 -08:00
except subprocess . CalledProcessError :
return False
return True
2025-05-29 06:06:47 -07:00
def run_prettier (
2026-02-19 15:57:12 -05:00
prettier : pathlib . Path , check : bool , files_to_format : Union [ list [ str ] , str ] = " all "
2025-05-29 06:06:47 -07:00
) - > bool :
2024-10-23 08:48:43 -07:00
# Explicitly ignore anything in the output directories or any symlinks in the root of the repository
# to prevent bad symlinks from failing the run, see https://github.com/prettier/prettier/issues/11568 as
# to why it the paths being present in .prettierignore isn't sufficient
force_exclude_dirs = {
2024-09-18 08:34:23 -07:00
" !./build " ,
" !./bazel-bin " ,
" !./bazel-out " ,
" !./bazel-mongo " ,
" !./external " ,
2025-06-18 13:05:12 -07:00
" !./.compiledb " ,
2026-02-19 23:57:40 +00:00
" !./monguard " ,
2024-10-23 08:48:43 -07:00
}
for path in pathlib . Path ( " . " ) . iterdir ( ) :
if path . is_symlink ( ) :
force_exclude_dirs . add ( f " !./ { path } " )
2024-04-03 16:12:53 -07:00
try :
2025-01-07 12:55:10 -08:00
command = [
str ( prettier ) ,
" --cache " ,
" --log-level " ,
" warn " ,
2026-02-19 23:57:40 +00:00
# Changed-files mode may include extensions prettier does not parse (for example .py, .sky).
# Ignore unknown files so formatter routing can continue instead of failing early.
" --ignore-unknown " ,
2025-05-29 06:06:47 -07:00
]
if files_to_format == " all " :
command + = [ " . " ]
2025-06-17 12:57:23 -07:00
elif len ( files_to_format ) == 0 :
print ( " Skipping prettier due to having no files to format. " )
return True
2025-05-29 06:06:47 -07:00
else :
command + = files_to_format
command + = list ( force_exclude_dirs )
2024-05-16 23:57:57 +00:00
if check :
command . append ( " --check " )
else :
command . append ( " --write " )
2025-01-07 12:55:10 -08:00
print ( " Running prettier " )
2024-04-03 16:12:53 -07:00
subprocess . run ( command , check = True )
except subprocess . CalledProcessError :
print ( " Found formatting errors. Run ' bazel run //:format ' to fix " )
2024-04-26 16:58:26 -07:00
print ( " *** IF BAZEL IS NOT INSTALLED, RUN THE FOLLOWING: *** \n " )
2024-04-10 17:47:07 -07:00
print ( " python buildscripts/install_bazel.py " )
2025-01-07 09:41:11 -08:00
if os . path . exists ( " external " ) :
print (
" \n external exists which may be causing issues in the linter, please try running: \n "
)
print ( " \t unlink external " )
2024-12-31 14:52:28 -08:00
return False
2024-04-03 16:12:53 -07:00
if check :
print ( " No formatting errors " )
2024-12-31 14:52:28 -08:00
return True
2024-04-03 16:12:53 -07:00
def main ( ) - > int :
# If we are running in bazel, default the directory to the workspace
default_dir = os . environ . get ( " BUILD_WORKSPACE_DIRECTORY " )
if not default_dir :
print ( " This script must be run though bazel. Please run ' bazel run //:format ' instead " )
2024-04-26 16:58:26 -07:00
print ( " *** IF BAZEL IS NOT INSTALLED, RUN THE FOLLOWING: *** \n " )
2024-04-10 17:47:07 -07:00
print ( " python buildscripts/install_bazel.py " )
2024-04-03 16:12:53 -07:00
return 1
2024-05-17 12:01:09 -04:00
parser = argparse . ArgumentParser (
prog = " Format " , description = " This script formats code in mongodb "
)
2024-04-03 16:12:53 -07:00
parser . add_argument ( " --check " , help = " Run in check mode " , default = False , action = " store_true " )
2024-05-17 12:01:09 -04:00
parser . add_argument (
" --prettier " , help = " Set the path to prettier " , required = True , type = pathlib . Path
)
2025-02-19 11:46:14 -08:00
parser . add_argument (
" --rules-lint-format " ,
help = " Set the path to rules_lint ' s formatter " ,
required = True ,
type = pathlib . Path ,
)
2025-02-20 11:38:12 -08:00
parser . add_argument (
" --rules-lint-format-check " ,
help = " Set the path to rules_lint ' s formatter check script " ,
required = True ,
type = pathlib . Path ,
)
2025-05-29 06:06:47 -07:00
parser . add_argument (
" --all " ,
help = " Format all files instead of just formatting files that have changed since the fork point " ,
action = " store_true " ,
)
parser . add_argument (
" --origin-branch " ,
2026-01-27 10:59:18 -06:00
help = " The branch to use as the fork point for changed files (example: origin/master) " ,
default = " auto " ,
2025-05-29 06:06:47 -07:00
)
2025-10-29 15:18:07 -04:00
parser . add_argument (
" --file " ,
help = " The file to format " ,
type = pathlib . Path ,
)
2024-04-03 16:12:53 -07:00
args = parser . parse_args ( )
prettier_path : pathlib . Path = args . prettier . resolve ( )
os . chdir ( default_dir )
2024-12-31 14:52:28 -08:00
2026-01-27 10:59:18 -06:00
origin_branch = args . origin_branch
if origin_branch == " auto " :
remote = get_mongodb_remote ( Repo ( ) )
origin_branch = f " { remote . name } /master "
2025-06-02 20:59:27 -07:00
files_to_format = " all "
2025-10-29 15:18:07 -04:00
if args . file :
files_to_format = [ str ( args . file ) ]
elif not args . all :
2025-07-16 08:23:12 -07:00
max_distance = 100
2026-01-27 10:59:18 -06:00
distance = _git_distance ( [ f " { origin_branch } ..HEAD " ] )
2025-07-16 08:23:12 -07:00
if distance > max_distance :
2025-06-02 20:59:27 -07:00
print (
2026-01-27 10:59:18 -06:00
f " The number of commits between current branch and origin branch ( { origin_branch } ) is too large: { distance } commits (> { max_distance } commits). "
2025-06-02 20:59:27 -07:00
)
print ( " WARNING!!! Defaulting to formatting all files, this may take a while. " )
print (
" Please update your local branch with the latest changes from origin, or use `bazel run format -- --origin-branch other_branch` to select a different origin branch "
)
args . all = True
else :
2026-01-27 10:59:18 -06:00
files_to_format = _get_files_changed_since_fork_point ( origin_branch )
2025-05-29 06:06:47 -07:00
2026-02-19 15:57:12 -05:00
def files_to_format_contains_bazel_file ( files : Union [ list [ str ] , str ] ) - > bool :
2025-05-29 06:06:47 -07:00
if files == " all " :
return True
return any ( file . endswith ( " .bazel " ) or " BUILD " in file for file in files )
if files_to_format_contains_bazel_file ( files_to_format ) :
2025-08-13 16:41:45 -05:00
validate_clang_tidy_configs ( generate_report = True , fix = not args . check )
2025-05-29 06:06:47 -07:00
validate_bazel_groups ( generate_report = True , fix = not args . check )
2025-10-14 15:10:55 -05:00
validate_idl_naming ( generate_report = True , fix = not args . check )
validate_private_headers ( generate_report = True , fix = not args . check )
2025-05-29 06:06:47 -07:00
if files_to_format != " all " :
files_to_format = [ str ( file ) for file in files_to_format if os . path . isfile ( file ) ]
2025-02-20 11:38:12 -08:00
2025-08-20 12:28:06 -07:00
return (
0
2025-09-22 17:19:49 -07:00
if run_prettier ( prettier_path , args . check , files_to_format )
and run_rules_lint (
2025-09-03 18:17:32 -07:00
args . rules_lint_format , args . rules_lint_format_check , args . check , files_to_format
)
2025-01-02 09:49:13 -08:00
else 1
)
2024-04-03 16:12:53 -07:00
if __name__ == " __main__ " :
exit ( main ( ) )