414 lines
16 KiB
Python
414 lines
16 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timedelta, timezone
|
|
from enum import Enum
|
|
from statistics import median
|
|
from typing import Dict, Iterable, List, Tuple
|
|
|
|
import structlog
|
|
import typer
|
|
from tabulate import tabulate
|
|
from typing_extensions import Annotated
|
|
|
|
# Get relative imports to work when the package is not installed on the PYTHONPATH.
|
|
if __name__ == "__main__" and __package__ is None:
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
|
|
|
from buildscripts.client.jiraclient import JiraAuth, JiraClient
|
|
from buildscripts.monitor_build_status.bfs_report import BfCategory, BFsReport
|
|
from buildscripts.monitor_build_status.code_lockdown_config import (
|
|
BfThresholds,
|
|
CodeLockdownConfig,
|
|
)
|
|
from buildscripts.monitor_build_status.evergreen_service import (
|
|
EvergreenService,
|
|
EvgProjectsInfo,
|
|
TaskStatusCounts,
|
|
)
|
|
from buildscripts.monitor_build_status.jira_service import (
|
|
JiraCustomFieldNames,
|
|
JiraService,
|
|
)
|
|
from buildscripts.resmokelib.utils.evergreen_conn import get_evergreen_api
|
|
from buildscripts.util.cmdutils import enable_logging
|
|
|
|
LOGGER = structlog.get_logger(__name__)
|
|
|
|
BLOCK_ON_RED_PLAYBOOK_URL = "http://go/blockonred"
|
|
CODE_LOCKDOWN_CONFIG = "etc/code_lockdown.yml"
|
|
|
|
JIRA_SERVER = "https://jira.mongodb.org"
|
|
DEFAULT_REPO = "10gen/mongo"
|
|
DEFAULT_BRANCH = "master"
|
|
SLACK_CHANNEL = "#10gen-mongo-code-lockdown"
|
|
EVERGREEN_LOOKBACK_DAYS = 14
|
|
|
|
|
|
def iterable_to_jql(entries: Iterable[str]) -> str:
|
|
return ", ".join(f'"{entry}"' for entry in entries)
|
|
|
|
|
|
JIRA_PROJECTS = {"Build Failures"}
|
|
END_STATUSES = {"Needs Triage", "Open", "In Progress", "Waiting for Bug Fix"}
|
|
PRIORITIES = {"Blocker - P1", "Critical - P2", "Major - P3", "Minor - P4"}
|
|
EXCLUDE_LABELS = {"exclude-from-master-quota"}
|
|
ACTIVE_BFS_QUERY = (
|
|
f"project in ({iterable_to_jql(JIRA_PROJECTS)})"
|
|
f" AND status in ({iterable_to_jql(END_STATUSES)})"
|
|
f" AND priority in ({iterable_to_jql(PRIORITIES)})"
|
|
f" AND (labels not in ({iterable_to_jql(EXCLUDE_LABELS)}) OR labels is EMPTY)"
|
|
)
|
|
|
|
|
|
class CodeMergeStatus(Enum):
|
|
RED = "RED"
|
|
GREEN = "GREEN"
|
|
|
|
@classmethod
|
|
def from_threshold_percentages(cls, threshold_percentages: List[float]) -> CodeMergeStatus:
|
|
if any(percentage > 100 for percentage in threshold_percentages):
|
|
return cls.RED
|
|
return cls.GREEN
|
|
|
|
|
|
class SummaryMsg(Enum):
|
|
PREFIX = "`[SUMMARY]`"
|
|
|
|
BELOW_THRESHOLDS = "All metrics are within 100% of their thresholds.\nAll merges are allowed."
|
|
THRESHOLD_EXCEEDED = (
|
|
"At least one metric exceeds 100% of its threshold.\n"
|
|
"Approve only changes that fix BFs, Bugs, and Performance Regressions in the following scopes:"
|
|
)
|
|
|
|
PLAYBOOK_REFERENCE = f"Refer to our playbook at <{BLOCK_ON_RED_PLAYBOOK_URL}> for details."
|
|
|
|
|
|
class MonitorBuildStatusOrchestrator:
|
|
def __init__(
|
|
self,
|
|
jira_service: JiraService,
|
|
evg_service: EvergreenService,
|
|
code_lockdown_config: CodeLockdownConfig,
|
|
) -> None:
|
|
self.jira_service = jira_service
|
|
self.evg_service = evg_service
|
|
self.code_lockdown_config = code_lockdown_config
|
|
|
|
def evaluate_build_redness(self, repo: str, branch: str, notify: bool) -> None:
|
|
status_message = f"\n`[STATUS]` '{repo}' repo '{branch}' branch"
|
|
scope_percentages: Dict[str, List[float]] = {}
|
|
|
|
LOGGER.info("Getting Evergreen projects data")
|
|
evg_projects_info = self.evg_service.get_evg_project_info(repo, branch)
|
|
evg_project_names = evg_projects_info.branch_to_projects_map[branch]
|
|
LOGGER.info("Got Evergreen projects data")
|
|
|
|
bfs_report = self._make_bfs_report(evg_projects_info)
|
|
bf_count_status_msg, bf_count_percentages = self._get_bf_counts_status(
|
|
bfs_report, self.code_lockdown_config
|
|
)
|
|
status_message = f"{status_message}\n{bf_count_status_msg}\n"
|
|
scope_percentages.update(bf_count_percentages)
|
|
|
|
# We are looking for Evergreen versions that started before the beginning of yesterday
|
|
# to give them time to complete
|
|
window_end = datetime.utcnow().replace(
|
|
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
|
|
) - timedelta(days=1)
|
|
window_start = window_end - timedelta(days=EVERGREEN_LOOKBACK_DAYS)
|
|
|
|
waterfall_report = self._make_waterfall_report(
|
|
evg_project_names=evg_project_names, window_end=window_end
|
|
)
|
|
waterfall_failure_rate_status_msg = self._get_waterfall_redness_status(
|
|
waterfall_report=waterfall_report, window_start=window_start, window_end=window_end
|
|
)
|
|
status_message = f"{status_message}\n{waterfall_failure_rate_status_msg}\n"
|
|
|
|
summary = self._summarize(scope_percentages)
|
|
status_message = f"{status_message}\n{summary}"
|
|
|
|
for line in status_message.split("\n"):
|
|
LOGGER.info(line)
|
|
|
|
if notify:
|
|
LOGGER.info("Notifying slack channel with results", slack_channel=SLACK_CHANNEL)
|
|
self.evg_service.evg_api.send_slack_message(
|
|
target=SLACK_CHANNEL,
|
|
msg=status_message.strip(),
|
|
)
|
|
|
|
def _make_bfs_report(self, evg_projects_info: EvgProjectsInfo) -> BFsReport:
|
|
query = (
|
|
f'{ACTIVE_BFS_QUERY} AND "{JiraCustomFieldNames.EVERGREEN_PROJECT}" in'
|
|
f" ({iterable_to_jql(evg_projects_info.active_project_names)})"
|
|
)
|
|
LOGGER.info("Getting active BFs from Jira", query=query)
|
|
|
|
active_bfs = self.jira_service.fetch_bfs(query)
|
|
LOGGER.info("Got active BFs", count=len(active_bfs))
|
|
|
|
bfs_report = BFsReport.empty()
|
|
for bf in active_bfs:
|
|
bfs_report.add_bf_data(bf, evg_projects_info)
|
|
|
|
return bfs_report
|
|
|
|
@staticmethod
|
|
def _get_bf_counts_status(
|
|
bfs_report: BFsReport, code_lockdown_config: CodeLockdownConfig
|
|
) -> Tuple[str, Dict[str, List[float]]]:
|
|
now = datetime.utcnow().replace(tzinfo=timezone.utc)
|
|
percentages: Dict[str, List[float]] = {}
|
|
|
|
status_message = "`[STATUS]` The current BF count"
|
|
headers = ["Scope", "Hot BFs", "Cold BFs", "Perf BFs"]
|
|
table_data = []
|
|
|
|
def _process_thresholds(
|
|
scope: str,
|
|
hot_bf_count: int,
|
|
cold_bf_count: int,
|
|
perf_bf_count: int,
|
|
thresholds: BfThresholds,
|
|
slack_tags: List[str],
|
|
) -> None:
|
|
if all(count == 0 for count in [hot_bf_count, cold_bf_count, perf_bf_count]):
|
|
return
|
|
|
|
hot_bf_percentage = hot_bf_count / thresholds.hot_bf.count * 100
|
|
cold_bf_percentage = cold_bf_count / thresholds.cold_bf.count * 100
|
|
perf_bf_percentage = perf_bf_count / thresholds.perf_bf.count * 100
|
|
|
|
label = f"{scope} {' '.join(slack_tags)}"
|
|
percentages[label] = [hot_bf_percentage, cold_bf_percentage, perf_bf_percentage]
|
|
|
|
table_data.append(
|
|
[
|
|
scope,
|
|
f"{hot_bf_percentage:.0f}% ({hot_bf_count} / {thresholds.hot_bf.count})",
|
|
f"{cold_bf_percentage:.0f}% ({cold_bf_count} / {thresholds.cold_bf.count})",
|
|
f"{perf_bf_percentage:.0f}% ({perf_bf_count} / {thresholds.perf_bf.count})",
|
|
]
|
|
)
|
|
|
|
overall_thresholds = code_lockdown_config.get_overall_thresholds()
|
|
overall_slack_tags = code_lockdown_config.get_overall_slack_tags()
|
|
_process_thresholds(
|
|
"[Org] Overall",
|
|
bfs_report.get_bf_count(
|
|
BfCategory.HOT,
|
|
now - timedelta(days=overall_thresholds.hot_bf.grace_period_days),
|
|
),
|
|
bfs_report.get_bf_count(
|
|
BfCategory.COLD,
|
|
now - timedelta(days=overall_thresholds.cold_bf.grace_period_days),
|
|
),
|
|
bfs_report.get_bf_count(
|
|
BfCategory.PERF,
|
|
now - timedelta(days=overall_thresholds.perf_bf.grace_period_days),
|
|
),
|
|
overall_thresholds,
|
|
overall_slack_tags,
|
|
)
|
|
|
|
for group_name in code_lockdown_config.get_all_group_names():
|
|
group_teams = code_lockdown_config.get_group_teams(group_name)
|
|
group_thresholds = code_lockdown_config.get_group_thresholds(group_name)
|
|
group_slack_tags = code_lockdown_config.get_group_slack_tags(group_name)
|
|
_process_thresholds(
|
|
f"[Group] {group_name}",
|
|
bfs_report.get_bf_count(
|
|
BfCategory.HOT,
|
|
now - timedelta(days=group_thresholds.hot_bf.grace_period_days),
|
|
group_teams,
|
|
),
|
|
bfs_report.get_bf_count(
|
|
BfCategory.COLD,
|
|
now - timedelta(days=group_thresholds.cold_bf.grace_period_days),
|
|
group_teams,
|
|
),
|
|
bfs_report.get_bf_count(
|
|
BfCategory.PERF,
|
|
now - timedelta(days=group_thresholds.perf_bf.grace_period_days),
|
|
group_teams,
|
|
),
|
|
group_thresholds,
|
|
group_slack_tags,
|
|
)
|
|
|
|
for assigned_team in sorted(list(bfs_report.team_reports.keys())):
|
|
team_thresholds = code_lockdown_config.get_team_thresholds(assigned_team)
|
|
team_slack_tags = code_lockdown_config.get_team_slack_tags(assigned_team)
|
|
_process_thresholds(
|
|
f"[Team] {assigned_team}",
|
|
bfs_report.get_bf_count(
|
|
BfCategory.HOT,
|
|
now - timedelta(days=team_thresholds.hot_bf.grace_period_days),
|
|
[assigned_team],
|
|
),
|
|
bfs_report.get_bf_count(
|
|
BfCategory.COLD,
|
|
now - timedelta(days=team_thresholds.cold_bf.grace_period_days),
|
|
[assigned_team],
|
|
),
|
|
bfs_report.get_bf_count(
|
|
BfCategory.PERF,
|
|
now - timedelta(days=team_thresholds.perf_bf.grace_period_days),
|
|
[assigned_team],
|
|
),
|
|
team_thresholds,
|
|
team_slack_tags,
|
|
)
|
|
|
|
table_str = tabulate(
|
|
table_data, headers, tablefmt="outline", colalign=("left", "right", "right", "right")
|
|
)
|
|
status_message = f"{status_message}\n```\n{table_str}\n```"
|
|
|
|
return status_message, percentages
|
|
|
|
def _make_waterfall_report(
|
|
self, evg_project_names: List[str], window_end: datetime
|
|
) -> Dict[str, List[TaskStatusCounts]]:
|
|
task_status_counts = []
|
|
for day in range(EVERGREEN_LOOKBACK_DAYS):
|
|
day_window_end = window_end - timedelta(days=day)
|
|
day_window_start = day_window_end - timedelta(days=1)
|
|
LOGGER.info(
|
|
"Getting Evergreen waterfall data",
|
|
projects=evg_project_names,
|
|
window_start=day_window_start.isoformat(),
|
|
window_end=day_window_end.isoformat(),
|
|
)
|
|
waterfall_status = self.evg_service.get_waterfall_status(
|
|
evg_project_names=evg_project_names,
|
|
window_start=day_window_start,
|
|
window_end=day_window_end,
|
|
)
|
|
task_status_counts.extend(
|
|
self._accumulate_project_statuses(evg_project_names, waterfall_status)
|
|
)
|
|
|
|
waterfall_report = {evg_project_name: [] for evg_project_name in evg_project_names}
|
|
for task_status_count in task_status_counts:
|
|
waterfall_report[task_status_count.project].append(task_status_count)
|
|
|
|
return waterfall_report
|
|
|
|
@staticmethod
|
|
def _accumulate_project_statuses(
|
|
evg_project_names: List[str], build_statuses: List[TaskStatusCounts]
|
|
) -> List[TaskStatusCounts]:
|
|
project_statuses = []
|
|
|
|
for evg_project_name in evg_project_names:
|
|
project_status = TaskStatusCounts(project=evg_project_name)
|
|
for build_status in build_statuses:
|
|
if build_status.project == evg_project_name:
|
|
project_status = project_status.add(build_status)
|
|
project_statuses.append(project_status)
|
|
|
|
return project_statuses
|
|
|
|
@staticmethod
|
|
def _get_waterfall_redness_status(
|
|
waterfall_report: Dict[str, List[TaskStatusCounts]],
|
|
window_start: datetime,
|
|
window_end: datetime,
|
|
) -> str:
|
|
date_format = "%Y-%m-%d"
|
|
status_message = (
|
|
f"`[STATUS]` Evergreen waterfall red and purple boxes median count per day"
|
|
f" between {window_start.strftime(date_format)}"
|
|
f" and {window_end.strftime(date_format)}"
|
|
)
|
|
|
|
for evg_project_name, daily_task_status_counts in waterfall_report.items():
|
|
daily_per_project_red_box_counts = [
|
|
task_status_counts.failed for task_status_counts in daily_task_status_counts
|
|
]
|
|
LOGGER.info(
|
|
"Daily per project red box counts",
|
|
project=evg_project_name,
|
|
daily_red_box_counts=daily_per_project_red_box_counts,
|
|
)
|
|
median_per_day_red_box_count = median(daily_per_project_red_box_counts)
|
|
status_message = (
|
|
f"{status_message}\n{evg_project_name}: {median_per_day_red_box_count:.0f}"
|
|
)
|
|
|
|
return status_message
|
|
|
|
@staticmethod
|
|
def _summarize(scope_percentages: Dict[str, List[float]]) -> str:
|
|
summary = SummaryMsg.PREFIX.value
|
|
|
|
red_scopes = []
|
|
for scope, percentages in scope_percentages.items():
|
|
status = CodeMergeStatus.from_threshold_percentages(percentages)
|
|
if status == CodeMergeStatus.RED:
|
|
red_scopes.append(scope)
|
|
|
|
if len(red_scopes) == 0:
|
|
summary = f"{summary} {SummaryMsg.BELOW_THRESHOLDS.value}"
|
|
else:
|
|
summary = f"{summary} {SummaryMsg.THRESHOLD_EXCEEDED.value}"
|
|
for scope in red_scopes:
|
|
summary = f"{summary}\n\t- {scope}"
|
|
|
|
summary = f"{summary}\n\n{SummaryMsg.PLAYBOOK_REFERENCE.value}\n"
|
|
|
|
return summary
|
|
|
|
|
|
def main(
|
|
github_repo: Annotated[
|
|
str, typer.Option(help="Github repository name that Evergreen projects track")
|
|
] = DEFAULT_REPO,
|
|
branch: Annotated[
|
|
str, typer.Option(help="Branch name that Evergreen projects track")
|
|
] = DEFAULT_BRANCH,
|
|
notify: Annotated[
|
|
bool, typer.Option(help="Whether to send slack notification with the results")
|
|
] = False, # default to the more "quiet" setting
|
|
) -> None:
|
|
"""
|
|
Analyze Jira BFs count and Evergreen redness data.
|
|
|
|
For Jira API authentication please use `JIRA_AUTH_PAT` env variable.
|
|
More about Jira Personal Access Tokens (PATs) here:
|
|
|
|
- https://wiki.corp.mongodb.com/pages/viewpage.action?pageId=218995581
|
|
|
|
For Evergreen API authentication please create `~/.evergreen.yml`.
|
|
More about Evergreen auth here:
|
|
|
|
- https://spruce.mongodb.com/preferences/cli
|
|
|
|
Example:
|
|
|
|
JIRA_AUTH_PAT=<auth-token> python buildscripts/monitor_build_status/cli.py --help
|
|
"""
|
|
enable_logging(verbose=False)
|
|
|
|
jira_client = JiraClient(JIRA_SERVER, JiraAuth())
|
|
evg_api = get_evergreen_api()
|
|
|
|
jira_service = JiraService(jira_client=jira_client)
|
|
evg_service = EvergreenService(evg_api=evg_api)
|
|
code_lockdown_config = CodeLockdownConfig.from_yaml_config(CODE_LOCKDOWN_CONFIG)
|
|
orchestrator = MonitorBuildStatusOrchestrator(
|
|
jira_service=jira_service,
|
|
evg_service=evg_service,
|
|
code_lockdown_config=code_lockdown_config,
|
|
)
|
|
|
|
orchestrator.evaluate_build_redness(github_repo, branch, notify)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
typer.run(main)
|