Files
mongo/buildscripts/monitor_build_status/cli.py
Mikhail Shchatko 9e3fb34c95 SERVER-90908 Gather Evergreen data and analyze for lockdown criteria (#22783)
GitOrigin-RevId: c23b72ce2f2ce343f3bb2ee4aafd2b78bde05e2b
2024-05-31 19:21:27 +00:00

341 lines
13 KiB
Python

from __future__ import annotations
from datetime import datetime, timedelta, timezone
from typing import Iterable, List
import structlog
import typer
from typing_extensions import Annotated
from buildscripts.client.jiraclient import JiraAuth, JiraClient
from buildscripts.monitor_build_status.bfs_report import BFsReport
from buildscripts.monitor_build_status.evergreen_service import (
EvergreenService,
EvgProjectsInfo,
TaskStatusCounts,
)
from buildscripts.monitor_build_status.jira_service import (
BfTemperature,
JiraCustomFieldNames,
JiraService,
TestType,
)
from buildscripts.resmokelib.utils.evergreen_conn import get_evergreen_api
from buildscripts.util.cmdutils import enable_logging
LOGGER = structlog.get_logger(__name__)
JIRA_SERVER = "https://jira.mongodb.org"
DEFAULT_REPO = "10gen/mongo"
DEFAULT_BRANCH = "master"
EVERGREEN_DAYS_LOOKBACK = 1
GLOBAL_CORRECTNESS_HOT_BF_COUNT_LIMIT = 0
GLOBAL_CORRECTNESS_COLD_BF_COUNT_LIMIT = 0
GLOBAL_PERFORMANCE_BF_COUNT_LIMIT = 0
PER_TEAM_CORRECTNESS_HOT_BF_COUNT_LIMIT = 0
PER_TEAM_CORRECTNESS_COLD_BF_COUNT_LIMIT = 0
PER_TEAM_PERFORMANCE_BF_COUNT_LIMIT = 0
EVERGREEN_WATERFALL_FAILURE_RATE_LIMIT = 0.00
EVERGREEN_PATCH_FAILURE_RATE_LIMIT = 0.00
def iterable_to_jql(entries: Iterable[str]) -> str:
return ", ".join(f'"{entry}"' for entry in entries)
JIRA_PROJECTS = {"Build Failures"}
END_STATUSES = {"Closed", "Resolved"}
ACTIVE_BFS_QUERY = (
f"project in ({iterable_to_jql(JIRA_PROJECTS)})"
f" AND status not in ({iterable_to_jql(END_STATUSES)})"
)
class MonitorBuildStatusOrchestrator:
def __init__(
self,
jira_service: JiraService,
evg_service: EvergreenService,
) -> None:
self.jira_service = jira_service
self.evg_service = evg_service
def evaluate_build_redness(self, repo: str, branch: str) -> None:
failures = []
LOGGER.info("Getting Evergreen projects data")
evg_projects_info = self.evg_service.get_evg_project_info(repo, branch)
evg_project_names = evg_projects_info.branch_to_projects_map[branch]
LOGGER.info("Got Evergreen projects data")
bfs_report = self._make_bfs_report(evg_projects_info)
failures.extend(self._check_bf_counts(bfs_report, branch))
# We are looking for versions and patches starting window_end the beginning of yesterday
# to give them time to complete
window_end = datetime.utcnow().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=1)
window_start = window_end - timedelta(days=EVERGREEN_DAYS_LOOKBACK)
waterfall_report = self._make_waterfall_report(
evg_project_names=evg_project_names, window_start=window_start, window_end=window_end
)
failures.extend(
self._check_waterfall_failure_rate(
reports=waterfall_report, window_start=window_start, window_end=window_end
)
)
patch_report = self._make_patch_report(
evg_project_names=evg_project_names, window_start=window_start, window_end=window_end
)
failures.extend(
self._check_patch_failure_rate(
reports=patch_report, window_start=window_start, window_end=window_end
)
)
for failure in failures:
LOGGER.error(failure)
def _make_bfs_report(self, evg_projects_info: EvgProjectsInfo) -> BFsReport:
query = (
f'{ACTIVE_BFS_QUERY} AND "{JiraCustomFieldNames.EVERGREEN_PROJECT}" in'
f" ({iterable_to_jql(evg_projects_info.active_project_names)})"
)
LOGGER.info("Getting active BFs from Jira", query=query)
active_bfs = self.jira_service.fetch_bfs(query)
LOGGER.info("Got active BFs", count=len(active_bfs))
bfs_report = BFsReport.empty()
for bf in active_bfs:
bfs_report.add_bf_data(bf, evg_projects_info)
return bfs_report
@staticmethod
def _check_bf_counts(bfs_report: BFsReport, branch: str) -> List[str]:
failures = []
bf_count_failure_msg = (
"BF count check failed: {scope}: {bf_type}"
" count ({bf_count}) exceeds threshold {bf_limit}"
)
global_correctness_hot_bf_count = bfs_report.get_bf_count(
test_types=[TestType.CORRECTNESS],
bf_temperatures=[BfTemperature.HOT],
)
if global_correctness_hot_bf_count > GLOBAL_CORRECTNESS_HOT_BF_COUNT_LIMIT:
failures.append(
bf_count_failure_msg.format(
scope=f"Branch({branch}): Global",
bf_type="Correctness HOT BF",
bf_count=global_correctness_hot_bf_count,
bf_limit=GLOBAL_CORRECTNESS_HOT_BF_COUNT_LIMIT,
)
)
global_correctness_cold_bf_count = bfs_report.get_bf_count(
test_types=[TestType.CORRECTNESS],
bf_temperatures=[BfTemperature.COLD, BfTemperature.NONE],
)
if global_correctness_cold_bf_count > GLOBAL_CORRECTNESS_COLD_BF_COUNT_LIMIT:
failures.append(
bf_count_failure_msg.format(
scope=f"Branch({branch}): Global",
bf_type="Correctness COLD BF",
bf_count=global_correctness_cold_bf_count,
bf_limit=GLOBAL_CORRECTNESS_COLD_BF_COUNT_LIMIT,
)
)
global_performance_bf_count = bfs_report.get_bf_count(
test_types=[TestType.PERFORMANCE],
bf_temperatures=[BfTemperature.HOT, BfTemperature.COLD, BfTemperature.NONE],
)
if global_performance_bf_count > GLOBAL_PERFORMANCE_BF_COUNT_LIMIT:
failures.append(
bf_count_failure_msg.format(
scope=f"Branch({branch}): Global",
bf_type="Performance BF",
bf_count=global_performance_bf_count,
bf_limit=GLOBAL_PERFORMANCE_BF_COUNT_LIMIT,
)
)
for team in bfs_report.all_assigned_teams:
per_team_correctness_hot_bf_count = bfs_report.get_bf_count(
test_types=[TestType.CORRECTNESS],
bf_temperatures=[BfTemperature.HOT],
assigned_team=team,
)
if per_team_correctness_hot_bf_count > PER_TEAM_CORRECTNESS_HOT_BF_COUNT_LIMIT:
failures.append(
bf_count_failure_msg.format(
scope=f"Branch({branch}): Team({team})",
bf_type="Correctness HOT BF",
bf_count=per_team_correctness_hot_bf_count,
bf_limit=PER_TEAM_CORRECTNESS_HOT_BF_COUNT_LIMIT,
)
)
per_team_correctness_cold_bf_count = bfs_report.get_bf_count(
test_types=[TestType.CORRECTNESS],
bf_temperatures=[BfTemperature.COLD, BfTemperature.NONE],
assigned_team=team,
)
if per_team_correctness_cold_bf_count > PER_TEAM_CORRECTNESS_COLD_BF_COUNT_LIMIT:
failures.append(
bf_count_failure_msg.format(
scope=f"Branch({branch}): Team({team})",
bf_type="Correctness COLD BF",
bf_count=per_team_correctness_cold_bf_count,
bf_limit=PER_TEAM_CORRECTNESS_COLD_BF_COUNT_LIMIT,
)
)
per_team_performance_bf_count = bfs_report.get_bf_count(
test_types=[TestType.PERFORMANCE],
bf_temperatures=[BfTemperature.HOT, BfTemperature.COLD, BfTemperature.NONE],
assigned_team=team,
)
if per_team_performance_bf_count > PER_TEAM_PERFORMANCE_BF_COUNT_LIMIT:
failures.append(
bf_count_failure_msg.format(
scope=f"Branch({branch}): Team({team})",
bf_type="Performance BF",
bf_count=per_team_performance_bf_count,
bf_limit=PER_TEAM_PERFORMANCE_BF_COUNT_LIMIT,
)
)
return failures
def _make_waterfall_report(
self, evg_project_names: List[str], window_start: datetime, window_end: datetime
) -> List[TaskStatusCounts]:
LOGGER.info(
"Getting Evergreen waterfall data",
projects=evg_project_names,
window_start=window_start.isoformat(),
window_end=window_end.isoformat(),
)
waterfall_status = self.evg_service.get_waterfall_status(
evg_project_names=evg_project_names, window_start=window_start, window_end=window_end
)
return self._accumulate_project_statuses(evg_project_names, waterfall_status)
def _make_patch_report(
self, evg_project_names: List[str], window_start: datetime, window_end: datetime
) -> List[TaskStatusCounts]:
LOGGER.info(
"Getting Evergreen patches data",
projects=evg_project_names,
window_start=window_start.isoformat(),
window_end=window_end.isoformat(),
)
waterfall_status = self.evg_service.get_patch_statuses(
evg_project_names=evg_project_names, window_start=window_start, window_end=window_end
)
return self._accumulate_project_statuses(evg_project_names, waterfall_status)
@staticmethod
def _accumulate_project_statuses(
evg_project_names: List[str], build_statuses: List[TaskStatusCounts]
) -> List[TaskStatusCounts]:
project_statuses = []
for evg_project_name in evg_project_names:
project_status = TaskStatusCounts(project=evg_project_name)
for build_status in build_statuses:
if build_status.project == evg_project_name:
project_status += build_status
project_statuses.append(project_status)
return project_statuses
@staticmethod
def _check_waterfall_failure_rate(
reports: List[TaskStatusCounts], window_start: datetime, window_end: datetime
) -> List[str]:
failures = []
for project_status in reports:
failure_rate = project_status.failure_rate()
if failure_rate > EVERGREEN_WATERFALL_FAILURE_RATE_LIMIT:
failures.append(
f"Waterfall redness check failed:"
f" Project({project_status.project}):"
f" Failure rate ({failure_rate})"
f" exceeds threshold {EVERGREEN_WATERFALL_FAILURE_RATE_LIMIT}"
f" between {window_start.isoformat()} and {window_end.isoformat()}"
)
return failures
@staticmethod
def _check_patch_failure_rate(
reports: List[TaskStatusCounts], window_start: datetime, window_end: datetime
) -> List[str]:
failures = []
for project_status in reports:
failure_rate = project_status.failure_rate()
if failure_rate > EVERGREEN_WATERFALL_FAILURE_RATE_LIMIT:
failures.append(
f"Patch redness check failed:"
f" Project({project_status.project}):"
f" Failure rate ({failure_rate})"
f" exceeds threshold {EVERGREEN_WATERFALL_FAILURE_RATE_LIMIT}"
f" between {window_start.isoformat()} and {window_end.isoformat()}"
)
return failures
def main(
github_repo: Annotated[
str, typer.Option(help="Github repository name that Evergreen projects track")
] = DEFAULT_REPO,
branch: Annotated[
str, typer.Option(help="Branch name that Evergreen projects track")
] = DEFAULT_BRANCH,
) -> None:
"""
Analyze Jira BFs count and Evergreen redness data.
For Jira API authentication please use `JIRA_AUTH_PAT` env variable.
More about Jira Personal Access Tokens (PATs) here:
- https://wiki.corp.mongodb.com/pages/viewpage.action?pageId=218995581
For Evergreen API authentication please create `~/.evergreen.yml`.
More about Evergreen auth here:
- https://spruce.mongodb.com/preferences/cli
Example:
JIRA_AUTH_PAT=<auth-token> python buildscripts/monitor_build_status/cli.py --help
"""
enable_logging(verbose=False)
jira_client = JiraClient(JIRA_SERVER, JiraAuth())
evg_api = get_evergreen_api()
jira_service = JiraService(jira_client=jira_client)
evg_service = EvergreenService(evg_api=evg_api)
orchestrator = MonitorBuildStatusOrchestrator(
jira_service=jira_service, evg_service=evg_service
)
orchestrator.evaluate_build_redness(github_repo, branch)
if __name__ == "__main__":
typer.run(main)