2025-09-08 22:02:13 -07:00
#!/usr/bin/env python3
"""
Script that opens a PR using a bot to update profile data links for PGO and BOLT .
This updates profiling_data . bzl and is reliant on the formatting of it to not change .
The script always expects 3 links , one to the bolt data , one to the gcc data , and one to the
clang data . It always expects one of either clang or gcc data to not actually contain data
because we only want to update one , however the build will work at updating either of them .
"""
import argparse
import hashlib
import os
import re
import sys
import tempfile
import requests
from github . GithubException import GithubException
from github . GithubIntegration import GithubIntegration
2026-02-06 10:00:46 -08:00
from jira import JIRAError
if __name__ == " __main__ " and __package__ is None :
sys . path . append ( os . path . dirname ( os . path . dirname ( os . path . abspath ( __file__ ) ) ) )
from buildscripts . client . jiraclient import JiraAuth , JiraClient
2025-09-08 22:02:13 -07:00
OWNER_NAME = " 10gen "
REPO_NAME = " mongo "
PROFILE_DATA_FILE_PATH = " bazel/repository_rules/profiling_data.bzl "
2026-02-06 10:00:46 -08:00
JIRA_SERVER = " https://jira.mongodb.org "
PROFILE_DATA_OWNING_TEAM = " Product Performance "
2025-09-08 22:02:13 -07:00
2025-09-22 17:19:49 -07:00
2025-09-08 22:02:13 -07:00
def get_mongo_repository ( app_id , private_key ) :
"""
Gets the mongo github repository
"""
app = GithubIntegration ( int ( app_id ) , private_key )
installation = app . get_repo_installation ( OWNER_NAME , REPO_NAME )
g = installation . get_github_for_installation ( )
return g . get_repo ( f " { OWNER_NAME } / { REPO_NAME } " )
2025-09-22 17:19:49 -07:00
2025-09-08 22:02:13 -07:00
def compute_sha256 ( file_path : str ) - > str :
"""
Compute the sha256 hash of a file
"""
sha256 = hashlib . sha256 ( )
with open ( file_path , " rb " ) as f :
for block in iter ( lambda : f . read ( 4096 ) , b " " ) :
sha256 . update ( block )
return sha256 . hexdigest ( )
2025-09-22 17:19:49 -07:00
2025-09-08 22:02:13 -07:00
def download_file ( url : str , output_location : str ) - > bool :
"""
Download a file to a specific output_location and return if the file existed remotely
"""
2025-09-22 17:19:49 -07:00
try :
2025-09-08 22:02:13 -07:00
response = requests . get ( url )
response . raise_for_status ( )
with open ( output_location , " wb " ) as file :
file . write ( response . content )
return True
except requests . exceptions . RequestException :
return False
2025-09-22 17:19:49 -07:00
2025-09-08 22:02:13 -07:00
def replace_quoted_text_in_tagged_line ( text : str , tag : str , new_text : str ) - > str :
"""
Replace the text between quotes in a line that starts with a specific tag
eg . FOO = " replace_this " - > FOO = " new_text "
"""
if tag not in text :
print ( f " Tag: { tag } did not exist in the file. " , file = sys . stderr )
sys . exit ( 1 )
pattern = rf ' ( { tag } .*? " (.*?) " ) '
return re . sub ( pattern , lambda match : match . group ( 0 ) . replace ( match . group ( 2 ) , new_text ) , text )
2025-09-22 17:19:49 -07:00
2025-09-08 22:02:13 -07:00
def update_bolt_info ( file_content : str , new_url : str , new_checksum : str ) - > str :
"""
Updates the bolt url and checksum lines in a file
"""
bolt_url_tag = " DEFAULT_BOLT_DATA_URL "
bolt_checksum_tag = " DEFAULT_BOLT_DATA_CHECKSUM "
updated_text = replace_quoted_text_in_tagged_line ( file_content , bolt_url_tag , new_url )
return replace_quoted_text_in_tagged_line ( updated_text , bolt_checksum_tag , new_checksum )
2025-09-22 17:19:49 -07:00
2025-09-08 22:02:13 -07:00
def update_clang_pgo_info ( file_content : str , new_url : str , new_checksum : str ) - > str :
"""
Updates the clang pgo url and checksum lines in a file
"""
clang_pgo_url_tag = " DEFAULT_CLANG_PGO_DATA_URL "
clang_pgo_checksum_tag = " DEFAULT_CLANG_PGO_DATA_CHECKSUM "
updated_text = replace_quoted_text_in_tagged_line ( file_content , clang_pgo_url_tag , new_url )
return replace_quoted_text_in_tagged_line ( updated_text , clang_pgo_checksum_tag , new_checksum )
2025-09-22 17:19:49 -07:00
2025-09-08 22:02:13 -07:00
def update_gcc_pgo_info ( file_content : str , new_url : str , new_checksum : str ) - > str :
"""
Updates the gcc pgo url and checksum lines in a file
"""
gcc_pgo_url_tag = " DEFAULT_GCC_PGO_DATA_URL "
gcc_pgo_checksum_tag = " DEFAULT_GCC_PGO_DATA_CHECKSUM "
updated_text = replace_quoted_text_in_tagged_line ( file_content , gcc_pgo_url_tag , new_url )
return replace_quoted_text_in_tagged_line ( updated_text , gcc_pgo_checksum_tag , new_checksum )
2025-09-22 17:19:49 -07:00
2026-02-06 10:00:46 -08:00
def create_backport_ticket ( version : str ) :
jira = JiraClient ( JIRA_SERVER , JiraAuth ( ) , dry_run = False )
jira = jira . _jira
server_issue_dict = {
" project " : { " key " : " SERVER " } ,
" issuetype " : { " name " : " Task " } ,
" summary " : " Update PGO profiles " ,
" description " : " Updated PGO profile numbers for performance. " ,
" customfield_12751 " : [ { " value " : PROFILE_DATA_OWNING_TEAM } ] ,
}
backport_issue_dict = {
" project " : { " key " : " BACKPORT " } ,
" issuetype " : { " name " : " Backport " } ,
" summary " : f " [ { version } ] Update PGO profiles " ,
# Branch
" customfield_14166 " : { " value " : version } ,
# Backport Justification
" customfield_25156 " : " Updated PGO profile numbers for performance. " ,
}
for attempt in range ( 3 ) :
try :
server_issue = jira . create_issue ( fields = server_issue_dict )
backport_issue = jira . create_issue ( fields = backport_issue_dict )
# For some reason you cant assign a team on creation for backport tickets
backport_issue . update ( { " customfield_12751 " : [ { " value " : PROFILE_DATA_OWNING_TEAM } ] } )
jira . create_issue_link (
type = " backported by " , inwardIssue = server_issue . key , outwardIssue = backport_issue . key
)
break
except JIRAError as err :
print ( err )
return None
return server_issue
2025-09-08 22:02:13 -07:00
def create_pr ( target_branch : str , new_branch : str , original_file , new_content : str ) :
"""
Opens up a pr for a single file with new contents
"""
target_repo_branch = repo . get_branch ( target_branch )
ref = f " refs/heads/ { new_branch } "
try :
repo . get_branch ( branch = new_branch )
except GithubException as e :
if e . status == 404 :
print ( f " Branch doesn ' t exist, creating branch { new_branch } . " )
repo . create_git_ref ( ref = ref , sha = target_repo_branch . commit . sha )
else :
2025-09-22 17:19:49 -07:00
raise
2026-02-06 10:00:46 -08:00
jira_ticket = " SERVER-110427 "
# This is a versioned backport branch if it stats with v
if target_branch != " master " and target_branch [ 0 ] == " v " :
# get v8.0 from either v8.0 or v8.0-staging
version = target_branch . split ( " - " ) [ 0 ]
new_ticket = create_backport_ticket ( version )
if new_ticket :
jira_ticket = new_ticket . key
else :
jira_ticket = " [Jira Ticket Creation Broken] "
2025-09-22 17:19:49 -07:00
repo . update_file (
path = PROFILE_DATA_FILE_PATH ,
content = new_content ,
branch = new_branch ,
message = " Updating profile files. " ,
sha = original_file . sha ,
)
repo . create_pull (
base = target_branch ,
head = new_branch ,
2026-02-06 10:00:46 -08:00
title = f " { jira_ticket } Update profiling data " ,
2025-09-22 17:19:49 -07:00
body = " Automated PR updating the profiling data. " ,
)
2025-09-08 22:02:13 -07:00
def create_profile_data_pr ( repo , args , target_branch , new_branch ) :
"""
Get the new text needed and create a pr for updating the profiling_data . bzl
"""
temp_dir = tempfile . mkdtemp ( )
bolt_file = os . path . join ( temp_dir , " bolt.fdata " )
clang_pgo_file = os . path . join ( temp_dir , " clang_pgo.profdata " )
gcc_pgo_file = os . path . join ( temp_dir , " gcc_pgo.tgz " )
bolt_file_exists = download_file ( args . bolt_url , bolt_file )
clang_pgo_file_exists = download_file ( args . clang_pgo_url , clang_pgo_file )
gcc_pgo_file_exists = download_file ( args . gcc_pgo_url , gcc_pgo_file )
# These are not errors because the script can run when no files were meant to be updated.
if not bolt_file_exists :
print ( f " Bolt file did not exist at { args . bolt_url } . Not creating PR. " )
sys . exit ( 0 )
if clang_pgo_file_exists and gcc_pgo_file_exists :
2025-09-22 17:19:49 -07:00
print (
f " Both clang and gcc had pgo files that existed. Clang: { args . clang_pgo_url } GCC: { args . gcc_pgo_url } . Only one should be updated at a time. Not creating PR. "
)
2025-09-08 22:02:13 -07:00
sys . exit ( 1 )
if not clang_pgo_file_exists and not gcc_pgo_file_exists :
2025-09-22 17:19:49 -07:00
print (
f " Neither clang nor gcc had pgo files that existed at either { args . clang_pgo_url } or { args . gcc_pgo_url } . Not creating PR. "
)
2025-09-08 22:02:13 -07:00
sys . exit ( 0 )
2025-09-22 17:19:49 -07:00
profiling_data_file = repo . get_contents (
PROFILE_DATA_FILE_PATH , ref = f " refs/heads/ { target_branch } "
)
2025-09-08 22:02:13 -07:00
profiling_data_file_content = profiling_data_file . decoded_content . decode ( )
2025-09-22 17:19:49 -07:00
profiling_file_updated_text = update_bolt_info (
profiling_data_file_content , args . bolt_url , compute_sha256 ( bolt_file )
)
2025-09-08 22:02:13 -07:00
if clang_pgo_file_exists :
2025-09-22 17:19:49 -07:00
profiling_file_updated_text = update_clang_pgo_info (
profiling_file_updated_text , args . clang_pgo_url , compute_sha256 ( clang_pgo_file )
)
2025-09-08 22:02:13 -07:00
else :
2025-09-22 17:19:49 -07:00
profiling_file_updated_text = update_gcc_pgo_info (
profiling_file_updated_text , args . gcc_pgo_url , compute_sha256 ( gcc_pgo_file )
)
2025-09-08 22:02:13 -07:00
create_pr ( target_branch , new_branch , profiling_data_file , profiling_file_updated_text )
2025-09-22 17:19:49 -07:00
2025-09-08 22:02:13 -07:00
if __name__ == " __main__ " :
2025-09-22 17:19:49 -07:00
parser = argparse . ArgumentParser (
description = " This script uses bolt file url, clang pgo file url and gcc pgo file url to create a PR updating the links to these files. "
)
2025-09-08 22:02:13 -07:00
parser . add_argument ( " bolt_url " , help = " URL that BOLT data was uploaded to. " )
parser . add_argument ( " clang_pgo_url " , help = " URL that clang pgo data was uploaded to. " )
parser . add_argument ( " gcc_pgo_url " , help = " URL that gcc pgo data was uploaded to. " )
parser . add_argument ( " target_branch " , help = " The branch you want to create a PR into. " )
parser . add_argument ( " new_branch " , help = " The new branch to create a PR from. " )
2025-11-11 15:42:53 -05:00
parser . add_argument (
" --app_id " , help = " App ID used for authentication. " , default = os . getenv ( " MONGO_PR_BOT_APP_ID " )
)
parser . add_argument (
" --private_key " ,
help = " Key to use for authentication. " ,
default = os . getenv ( " MONGO_PR_BOT_PRIVATE_KEY " ) ,
)
2025-09-08 22:02:13 -07:00
args = parser . parse_args ( )
2025-11-11 15:42:53 -05:00
if not args . app_id or not args . private_key :
parser . error (
" Must define --app-id or env MONGO_PR_BOT_APP_ID and --private-key or env MONGO_PR_BOT_PRIVATE_KEY. "
)
# Replace spaces with newline, if applicable
private_key = (
args . private_key [ : 31 ] + args . private_key [ 31 : - 29 ] . replace ( " " , " \n " ) + args . private_key [ - 29 : ]
)
repo = get_mongo_repository ( args . app_id , private_key )
2025-09-08 22:02:13 -07:00
create_profile_data_pr ( repo , args , args . target_branch , args . new_branch )