SERVER-115541: Add guards in commit queue for task-gen thresholds (#45415)

Co-authored-by: trevor <trevor.guidry@mongodb.com>
GitOrigin-RevId: f677d21d21edc1359e27882483a2af479367aa3e
This commit is contained in:
Steve McClure
2025-12-26 12:46:08 -05:00
committed by MongoDB Bot
parent c4063798c7
commit 5f0fd1605c
3 changed files with 151 additions and 36 deletions

View File

@@ -1,30 +0,0 @@
#!/usr/bin/env python3
import os
import sys
FILE_SIZE_THRESHOLD_IN_BYTES = 16 * 1024 * 1024 # 16MB
def main():
args = sys.argv[1:]
file_name = args[0]
file_path = os.path.join(os.path.dirname(os.getcwd()), file_name)
if os.path.exists(file_path):
file_size_in_bytes = os.path.getsize(file_path)
if file_size_in_bytes > FILE_SIZE_THRESHOLD_IN_BYTES:
print(
f"WARNING! {file_name} is {file_size_in_bytes} bytes, exceeding threshold"
f" {FILE_SIZE_THRESHOLD_IN_BYTES} bytes, file upload may fail due to network issues, or Evergreen"
f" may reject very large yaml sizes"
)
else:
print(
f"{file_name} is {file_size_in_bytes} bytes, below threshold {FILE_SIZE_THRESHOLD_IN_BYTES} bytes"
)
else:
print(f"{file_path} does not exist")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,143 @@
#!/usr/bin/env python3
import json
import os
import sys
# Get relative imports to work when the package is not installed on the PYTHONPATH.
if __name__ == "__main__" and __package__ is None:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from buildscripts.ciconfig.evergreen import parse_evergreen_file
from buildscripts.resmokelib.utils import evergreen_conn
FILE_SIZE_THRESHOLD_IN_BYTES = 16 * 1024 * 1024 # 16MB
FALLBACK_TASK_COUNT_THRESHOLD = 50_000 # if we can't reach Evergreen API for any reason
# buffer to avoid hitting the limit exactly and leave room for
# tasks generated by tasks other than version_gen
TASK_BUFFER = 1500
def validate_file_size(file_path: str) -> None:
"""Validate that the generated tasks config file size is below the threshold. This only fails if the file is missing. A warning is issued if the file size exceeds the threshold."""
file_path = os.path.join(os.path.dirname(os.getcwd()), file_path)
if not os.path.exists(file_path):
raise FileNotFoundError(f"{file_path} does not exist")
file_size_in_bytes = os.path.getsize(file_path)
if file_size_in_bytes > FILE_SIZE_THRESHOLD_IN_BYTES:
print(
f"WARNING! {file_path} is {file_size_in_bytes} bytes, exceeding threshold "
f"{FILE_SIZE_THRESHOLD_IN_BYTES} bytes. File upload may fail due to network issues, or Evergreen "
f"may reject very large yaml sizes"
)
else:
print(
f"File size validation passed: {file_path} is {file_size_in_bytes} bytes (threshold: {FILE_SIZE_THRESHOLD_IN_BYTES})"
)
def get_task_limit_from_evergreen() -> int:
"""Fetch the task limit from Evergreen's REST API."""
try:
# Call the admin/task_limits endpoint
evg_api = evergreen_conn.get_evergreen_api()
url = evg_api._create_url("/admin/task_limits")
json = evg_api._call_api(url, method="GET").json()
task_limit = json.get("max_tasks_per_version")
if task_limit is None:
print(
f"WARNING: 'max_tasks_per_version' not found in API response, using fallback: {FALLBACK_TASK_COUNT_THRESHOLD}"
)
return FALLBACK_TASK_COUNT_THRESHOLD
print(f"Fetched task limit from Evergreen API: {task_limit}")
return task_limit
except Exception as e:
print(
f"WARNING: Failed to fetch task limit from Evergreen API ({e}), using fallback: {FALLBACK_TASK_COUNT_THRESHOLD}"
)
return FALLBACK_TASK_COUNT_THRESHOLD
def calculate_total_task_count(config: dict) -> int:
task_group_dict = {tg["name"]: len(tg["tasks"]) for tg in config.get("task_groups", [])}
task_count = 0
for bv in config.get("buildvariants", []):
for t in bv.get("tasks", []):
if t["name"] in task_group_dict: # expand task group
task_count += task_group_dict[t["name"]]
else:
task_count += 1
display_tasks = sum(len(bv.get("display_tasks", [])) for bv in config.get("buildvariants", []))
task_count += display_tasks
return task_count
def calculate_generated_task_count() -> int:
"""Calculate the total number of generated tasks from the Evergreen config file."""
# Script runs from src/ directory, so generated config is at ./generated_resmoke_config/evergreen_config.json
config_path = os.path.join(os.getcwd(), "generated_resmoke_config", "evergreen_config.json")
try:
with open(config_path, "r") as f:
config = json.load(f)
return calculate_total_task_count(config)
except Exception as e:
raise Exception(f"Failed to calculate generated task count: {e}")
def calculate_nongenerated_task_count() -> int:
"""Calculate the total number of non-generated tasks from the Evergreen config file."""
evg_project_conf = parse_evergreen_file("etc/evergreen.yml")
config = evg_project_conf._conf
return calculate_total_task_count(config)
def validate_task_count() -> None:
"""Validate that the total number of generated tasks is below the threshold."""
generated_task_num = calculate_generated_task_count()
print(f"Generated task count: {generated_task_num}")
nongenerated_task_num = calculate_nongenerated_task_count()
print(f"Non-generated task count: {nongenerated_task_num}")
total_tasks = generated_task_num + nongenerated_task_num
print(f"Total task count: {total_tasks}")
total_tasks = total_tasks + TASK_BUFFER
print(f"Total task count with buffer ({TASK_BUFFER}): {total_tasks}")
task_limit = get_task_limit_from_evergreen()
if total_tasks > task_limit:
raise Exception(
f"Generated configuration contains {total_tasks} tasks, exceeding threshold "
f"of {task_limit} tasks. This causes Evergreen performance issues or failures. Reduce the number of tasks."
)
print(f"Task count validation passed: {total_tasks} tasks (threshold: {task_limit})")
def main():
args = sys.argv[1:]
tgz_file = args[0] # tgz file
validate_file_size(tgz_file)
validate_task_count()
if __name__ == "__main__":
main()

View File

@@ -1418,8 +1418,7 @@ functions:
include:
- "*"
"validate generate tasks config filesize":
&validate_generate_tasks_config_filesize
"validate generate tasks config": &validate_generate_tasks_config
command: subprocess.exec
display_name: "validate generate tasks config filesize"
type: test
@@ -1427,8 +1426,11 @@ functions:
binary: bash
args:
- "src/evergreen/run_python_script.sh"
- "buildscripts/validate_file_size.py"
- "buildscripts/validate_task_gen.py"
- "generate_tasks_config.tgz"
env:
EVERGREEN_API_USER: ${evergreen_api_user}
EVERGREEN_API_KEY: ${evergreen_api_key}
"upload generate tasks config": &upload_generate_tasks_config
command: s3.put
@@ -1471,7 +1473,7 @@ functions:
- *configure_evergreen_api_credentials
- *generate_version_sh
- *pack_generate_tasks_config
- *validate_generate_tasks_config_filesize
- *validate_generate_tasks_config
- *upload_generate_tasks_config
- *generate_resmoke_tasks_config
@@ -1480,7 +1482,7 @@ functions:
- *configure_evergreen_api_credentials
- *generate_version_sh
- *pack_generate_tasks_config
- *validate_generate_tasks_config_filesize
- *validate_generate_tasks_config
"generate version burn in":
- *f_expansions_write
@@ -1493,7 +1495,7 @@ functions:
args:
- "./src/evergreen/generate_version_burn_in.sh"
- *pack_generate_tasks_config
- *validate_generate_tasks_config_filesize
- *validate_generate_tasks_config
- *upload_burn_in_generate_tasks_config
- *generate_resmoke_tasks_config