382 lines
14 KiB
Python
382 lines
14 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timedelta, timezone
|
|
from enum import Enum
|
|
from statistics import median
|
|
from typing import Dict, List, Tuple
|
|
|
|
import structlog
|
|
import typer
|
|
from tabulate import tabulate
|
|
from typing_extensions import Annotated
|
|
|
|
# Get relative imports to work when the package is not installed on the PYTHONPATH.
|
|
if __name__ == "__main__" and __package__ is None:
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
|
|
|
from buildscripts.client.jiraclient import JiraAuth, JiraClient
|
|
from buildscripts.monitor_build_status.code_lockdown_config import (
|
|
CodeLockdownConfig,
|
|
IssueThresholds,
|
|
)
|
|
from buildscripts.monitor_build_status.evergreen_service import (
|
|
EvergreenService,
|
|
TaskStatusCounts,
|
|
)
|
|
from buildscripts.monitor_build_status.issue_report import IssueCategory, IssueReport
|
|
from buildscripts.monitor_build_status.jira_service import JiraService
|
|
from buildscripts.resmokelib.utils.evergreen_conn import get_evergreen_api
|
|
from buildscripts.util.cmdutils import enable_logging
|
|
|
|
LOGGER = structlog.get_logger(__name__)
|
|
|
|
BLOCK_ON_RED_PLAYBOOK_URL = "http://go/blockonred"
|
|
DASHBOARD_URL = "https://jira.mongodb.org/secure/Dashboard.jspa?selectPageId=33310"
|
|
CODE_LOCKDOWN_CONFIG = "etc/code_lockdown.yml"
|
|
|
|
JIRA_SERVER = "https://jira.mongodb.org"
|
|
DEFAULT_REPO = "10gen/mongo"
|
|
DEFAULT_BRANCH = "master"
|
|
SLACK_CHANNEL = "#10gen-mongo-code-lockdown"
|
|
EVERGREEN_LOOKBACK_DAYS = 14
|
|
|
|
|
|
# filter 53085 is all issues in scope
|
|
# filter 53200 identifies those which are hot
|
|
HOT_QUERY = "filter = 53085 AND filter = 53200"
|
|
COLD_QUERY = "filter = 53085 AND filter != 53200"
|
|
|
|
|
|
class CodeMergeStatus(Enum):
|
|
RED = "RED"
|
|
GREEN = "GREEN"
|
|
|
|
@classmethod
|
|
def from_threshold_percentages(cls, threshold_percentages: List[float]) -> CodeMergeStatus:
|
|
if any(percentage > 100 for percentage in threshold_percentages):
|
|
return cls.RED
|
|
return cls.GREEN
|
|
|
|
|
|
class SummaryMsg(Enum):
|
|
PREFIX = "`[SUMMARY]`"
|
|
|
|
BELOW_THRESHOLDS = "All metrics are within 100% of their thresholds.\nAll merges are allowed."
|
|
THRESHOLD_EXCEEDED = (
|
|
"At least one metric exceeds 100% of its threshold.\n"
|
|
"Approve only changes that fix BFs, Bugs, and Performance Regressions in the following scopes:"
|
|
)
|
|
|
|
PLAYBOOK_REFERENCE = f"Refer to our playbook at <{BLOCK_ON_RED_PLAYBOOK_URL}> for details."
|
|
DASHBOARD_REFERENCE = f"Drill into the data using the <{DASHBOARD_URL}|Jira Dashboard>."
|
|
|
|
|
|
class MonitorBuildStatusOrchestrator:
|
|
def __init__(
|
|
self,
|
|
jira_service: JiraService,
|
|
evg_service: EvergreenService,
|
|
code_lockdown_config: CodeLockdownConfig,
|
|
) -> None:
|
|
self.jira_service = jira_service
|
|
self.evg_service = evg_service
|
|
self.code_lockdown_config = code_lockdown_config
|
|
|
|
def evaluate_build_redness(self, repo: str, branch: str, notify: bool) -> None:
|
|
status_message = f"\n`[STATUS]` '{repo}' repo '{branch}' branch"
|
|
scope_percentages: Dict[str, List[float]] = {}
|
|
|
|
LOGGER.info("Getting Evergreen projects data")
|
|
evg_projects_info = self.evg_service.get_evg_project_info(repo, branch)
|
|
evg_project_names = evg_projects_info.branch_to_projects_map[branch]
|
|
LOGGER.info("Got Evergreen projects data")
|
|
|
|
issue_report = self._make_report()
|
|
issue_count_status_msg, issue_count_percentages = self._get_issue_counts_status(
|
|
issue_report, self.code_lockdown_config
|
|
)
|
|
status_message = f"{status_message}\n{issue_count_status_msg}\n"
|
|
scope_percentages.update(issue_count_percentages)
|
|
|
|
# We are looking for Evergreen versions that started before the beginning of yesterday
|
|
# to give them time to complete
|
|
window_end = datetime.utcnow().replace(
|
|
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
|
|
) - timedelta(days=1)
|
|
window_start = window_end - timedelta(days=EVERGREEN_LOOKBACK_DAYS)
|
|
|
|
waterfall_report = self._make_waterfall_report(
|
|
evg_project_names=evg_project_names, window_end=window_end
|
|
)
|
|
waterfall_failure_rate_status_msg = self._get_waterfall_redness_status(
|
|
waterfall_report=waterfall_report, window_start=window_start, window_end=window_end
|
|
)
|
|
status_message = f"{status_message}\n{waterfall_failure_rate_status_msg}\n"
|
|
|
|
summary = self._summarize(scope_percentages)
|
|
status_message = f"{status_message}\n{summary}"
|
|
|
|
for line in status_message.split("\n"):
|
|
LOGGER.info(line)
|
|
|
|
if notify:
|
|
LOGGER.info("Notifying slack channel with results", slack_channel=SLACK_CHANNEL)
|
|
self.evg_service.evg_api.send_slack_message(
|
|
target=SLACK_CHANNEL,
|
|
msg=status_message.strip(),
|
|
)
|
|
|
|
def _make_report(self) -> IssueReport:
|
|
LOGGER.info("Getting hot issues from Jira", query=HOT_QUERY)
|
|
hot_issues = self.jira_service.fetch_issues(HOT_QUERY)
|
|
LOGGER.info("Getting cold issues from Jira", query=COLD_QUERY)
|
|
cold_issues = self.jira_service.fetch_issues(COLD_QUERY)
|
|
|
|
LOGGER.info("Got active Issues", count_hot=len(hot_issues), count_cold=len(cold_issues))
|
|
|
|
report = IssueReport.empty()
|
|
report.add_issues(hot=hot_issues, cold=cold_issues)
|
|
|
|
return report
|
|
|
|
@staticmethod
|
|
def _get_issue_counts_status(
|
|
bfs_report: IssueReport, code_lockdown_config: CodeLockdownConfig
|
|
) -> Tuple[str, Dict[str, List[float]]]:
|
|
now = datetime.utcnow().replace(tzinfo=timezone.utc)
|
|
percentages: Dict[str, List[float]] = {}
|
|
|
|
status_message = "`[STATUS]` The current issue count"
|
|
headers = ["Scope", "Hot Issues", "Cold Issues"]
|
|
table_data = []
|
|
|
|
def _process_thresholds(
|
|
scope: str,
|
|
hot_issue_count: int,
|
|
cold_issue_count: int,
|
|
thresholds: IssueThresholds,
|
|
slack_tags: List[str],
|
|
) -> None:
|
|
if all(count == 0 for count in [hot_issue_count, cold_issue_count]):
|
|
return
|
|
|
|
hot_bf_percentage = hot_issue_count / thresholds.hot.count * 100
|
|
cold_bf_percentage = cold_issue_count / thresholds.cold.count * 100
|
|
|
|
label = f"{scope} {' '.join(slack_tags)}"
|
|
percentages[label] = [hot_bf_percentage, cold_bf_percentage]
|
|
|
|
table_data.append(
|
|
[
|
|
scope,
|
|
f"{hot_bf_percentage:.0f}% ({hot_issue_count} / {thresholds.hot.count})",
|
|
f"{cold_bf_percentage:.0f}% ({cold_issue_count} / {thresholds.cold.count})",
|
|
]
|
|
)
|
|
|
|
overall_thresholds = code_lockdown_config.get_overall_thresholds()
|
|
overall_slack_tags = code_lockdown_config.get_overall_slack_tags()
|
|
_process_thresholds(
|
|
"[Org] Overall",
|
|
bfs_report.get_issue_count(
|
|
IssueCategory.HOT,
|
|
now - timedelta(days=overall_thresholds.hot.grace_period_days),
|
|
),
|
|
bfs_report.get_issue_count(
|
|
IssueCategory.COLD,
|
|
now - timedelta(days=overall_thresholds.cold.grace_period_days),
|
|
),
|
|
overall_thresholds,
|
|
overall_slack_tags,
|
|
)
|
|
|
|
for group_name in code_lockdown_config.get_all_group_names():
|
|
group_teams = code_lockdown_config.get_group_teams(group_name)
|
|
group_thresholds = code_lockdown_config.get_group_thresholds(group_name)
|
|
group_slack_tags = code_lockdown_config.get_group_slack_tags(group_name)
|
|
_process_thresholds(
|
|
f"[Group] {group_name}",
|
|
bfs_report.get_issue_count(
|
|
IssueCategory.HOT,
|
|
now - timedelta(days=group_thresholds.hot.grace_period_days),
|
|
group_teams,
|
|
),
|
|
bfs_report.get_issue_count(
|
|
IssueCategory.COLD,
|
|
now - timedelta(days=group_thresholds.cold.grace_period_days),
|
|
group_teams,
|
|
),
|
|
group_thresholds,
|
|
group_slack_tags,
|
|
)
|
|
|
|
for assigned_team in sorted(list(bfs_report.team_reports.keys())):
|
|
team_thresholds = code_lockdown_config.get_team_thresholds(assigned_team)
|
|
team_slack_tags = code_lockdown_config.get_team_slack_tags(assigned_team)
|
|
_process_thresholds(
|
|
f"[Team] {assigned_team}",
|
|
bfs_report.get_issue_count(
|
|
IssueCategory.HOT,
|
|
now - timedelta(days=team_thresholds.hot.grace_period_days),
|
|
[assigned_team],
|
|
),
|
|
bfs_report.get_issue_count(
|
|
IssueCategory.COLD,
|
|
now - timedelta(days=team_thresholds.cold.grace_period_days),
|
|
[assigned_team],
|
|
),
|
|
team_thresholds,
|
|
team_slack_tags,
|
|
)
|
|
|
|
table_str = tabulate(
|
|
table_data, headers, tablefmt="outline", colalign=("left", "right", "right")
|
|
)
|
|
status_message = f"{status_message}\n```\n{table_str}\n```"
|
|
|
|
return status_message, percentages
|
|
|
|
def _make_waterfall_report(
|
|
self, evg_project_names: List[str], window_end: datetime
|
|
) -> Dict[str, List[TaskStatusCounts]]:
|
|
task_status_counts = []
|
|
for day in range(EVERGREEN_LOOKBACK_DAYS):
|
|
day_window_end = window_end - timedelta(days=day)
|
|
day_window_start = day_window_end - timedelta(days=1)
|
|
LOGGER.info(
|
|
"Getting Evergreen waterfall data",
|
|
projects=evg_project_names,
|
|
window_start=day_window_start.isoformat(),
|
|
window_end=day_window_end.isoformat(),
|
|
)
|
|
waterfall_status = self.evg_service.get_waterfall_status(
|
|
evg_project_names=evg_project_names,
|
|
window_start=day_window_start,
|
|
window_end=day_window_end,
|
|
)
|
|
task_status_counts.extend(
|
|
self._accumulate_project_statuses(evg_project_names, waterfall_status)
|
|
)
|
|
|
|
waterfall_report = {evg_project_name: [] for evg_project_name in evg_project_names}
|
|
for task_status_count in task_status_counts:
|
|
waterfall_report[task_status_count.project].append(task_status_count)
|
|
|
|
return waterfall_report
|
|
|
|
@staticmethod
|
|
def _accumulate_project_statuses(
|
|
evg_project_names: List[str], build_statuses: List[TaskStatusCounts]
|
|
) -> List[TaskStatusCounts]:
|
|
project_statuses = []
|
|
|
|
for evg_project_name in evg_project_names:
|
|
project_status = TaskStatusCounts(project=evg_project_name)
|
|
for build_status in build_statuses:
|
|
if build_status.project == evg_project_name:
|
|
project_status = project_status.add(build_status)
|
|
project_statuses.append(project_status)
|
|
|
|
return project_statuses
|
|
|
|
@staticmethod
|
|
def _get_waterfall_redness_status(
|
|
waterfall_report: Dict[str, List[TaskStatusCounts]],
|
|
window_start: datetime,
|
|
window_end: datetime,
|
|
) -> str:
|
|
date_format = "%Y-%m-%d"
|
|
status_message = (
|
|
f"`[STATUS]` Evergreen waterfall red and purple boxes median count per day"
|
|
f" between {window_start.strftime(date_format)}"
|
|
f" and {window_end.strftime(date_format)}"
|
|
)
|
|
|
|
for evg_project_name, daily_task_status_counts in waterfall_report.items():
|
|
daily_per_project_red_box_counts = [
|
|
task_status_counts.failed for task_status_counts in daily_task_status_counts
|
|
]
|
|
LOGGER.info(
|
|
"Daily per project red box counts",
|
|
project=evg_project_name,
|
|
daily_red_box_counts=daily_per_project_red_box_counts,
|
|
)
|
|
median_per_day_red_box_count = median(daily_per_project_red_box_counts)
|
|
status_message = (
|
|
f"{status_message}\n{evg_project_name}: {median_per_day_red_box_count:.0f}"
|
|
)
|
|
|
|
return status_message
|
|
|
|
@staticmethod
|
|
def _summarize(scope_percentages: Dict[str, List[float]]) -> str:
|
|
summary = SummaryMsg.PREFIX.value
|
|
|
|
red_scopes = []
|
|
for scope, percentages in scope_percentages.items():
|
|
status = CodeMergeStatus.from_threshold_percentages(percentages)
|
|
if status == CodeMergeStatus.RED:
|
|
red_scopes.append(scope)
|
|
|
|
if len(red_scopes) == 0:
|
|
summary = f"{summary} {SummaryMsg.BELOW_THRESHOLDS.value}"
|
|
else:
|
|
summary = f"{summary} {SummaryMsg.THRESHOLD_EXCEEDED.value}"
|
|
for scope in red_scopes:
|
|
summary = f"{summary}\n\t- {scope}"
|
|
|
|
summary = f"{summary}\n\n{SummaryMsg.PLAYBOOK_REFERENCE.value}\n{SummaryMsg.DASHBOARD_REFERENCE.value}"
|
|
|
|
return summary
|
|
|
|
|
|
def main(
|
|
github_repo: Annotated[
|
|
str, typer.Option(help="Github repository name that Evergreen projects track")
|
|
] = DEFAULT_REPO,
|
|
branch: Annotated[
|
|
str, typer.Option(help="Branch name that Evergreen projects track")
|
|
] = DEFAULT_BRANCH,
|
|
notify: Annotated[
|
|
bool, typer.Option(help="Whether to send slack notification with the results")
|
|
] = False, # default to the more "quiet" setting
|
|
) -> None:
|
|
"""
|
|
Analyze Jira BFs count and Evergreen redness data.
|
|
|
|
For Jira API authentication please use `JIRA_AUTH_PAT` env variable.
|
|
More about Jira Personal Access Tokens (PATs) here:
|
|
|
|
- https://wiki.corp.mongodb.com/pages/viewpage.action?pageId=218995581
|
|
|
|
For Evergreen API authentication please create `~/.evergreen.yml`.
|
|
More about Evergreen auth here:
|
|
|
|
- https://spruce.mongodb.com/preferences/cli
|
|
|
|
Example:
|
|
|
|
JIRA_AUTH_PAT=<auth-token> python buildscripts/monitor_build_status/cli.py --help
|
|
"""
|
|
enable_logging(verbose=False)
|
|
|
|
jira_client = JiraClient(JIRA_SERVER, JiraAuth())
|
|
evg_api = get_evergreen_api()
|
|
|
|
jira_service = JiraService(jira_client=jira_client)
|
|
evg_service = EvergreenService(evg_api=evg_api)
|
|
code_lockdown_config = CodeLockdownConfig.from_yaml_config(CODE_LOCKDOWN_CONFIG)
|
|
orchestrator = MonitorBuildStatusOrchestrator(
|
|
jira_service=jira_service,
|
|
evg_service=evg_service,
|
|
code_lockdown_config=code_lockdown_config,
|
|
)
|
|
|
|
orchestrator.evaluate_build_redness(github_repo, branch, notify)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
typer.run(main)
|