Files
mongo/buildscripts/resmokelib/setup_multiversion/download.py
Steve McClure 5b9b64cb30 SERVER-105059: Avoid using boto on PPC/S390 (#36122)
GitOrigin-RevId: 5907e7d2a087e75b2e27b0c0ead4d605766f614d
2025-05-14 14:25:43 +00:00

225 lines
7.1 KiB
Python

"""Helper functions to download."""
import contextlib
import errno
import glob
import os
import platform
import shutil
import tarfile
import zipfile
from urllib.parse import parse_qs, urlparse
import requests
import structlog
from buildscripts.resmokelib.utils import archival
from buildscripts.resmokelib.utils.filesystem import build_hygienic_bin_path, mkdtemp_in_build_dir
S3_BUCKET = "mciuploads"
LOGGER = structlog.getLogger(__name__)
class DownloadError(Exception):
"""Errors in download.py."""
pass
def is_s3_presigned_url(url: str) -> bool:
"""
Return True if `url` looks like an AWS S3 presigned URL (SigV4).
"""
qs = parse_qs(urlparse(url).query)
return "X-Amz-Signature" in qs
def extract_s3_bucket_key(url: str) -> tuple[str, str]:
"""
Extracts the S3 bucket name and object key from an HTTP(s) S3 URL.
Supports both:
- https://bucket.s3.amazonaws.com/key/…
- https://bucket.s3.<region>.amazonaws.com/key/…
Returns:
(bucket, key)
"""
parsed = urlparse(url)
# Hostname labels, e.g. ["bucket","s3","us-east-1","amazonaws","com"]
bucket = parsed.hostname.split(".")[0]
key = parsed.path.lstrip("/")
return bucket, key
def download_from_s3_with_requests(url, output_file):
with requests.get(url, stream=True) as reader:
with open(output_file, "wb") as file_handle:
shutil.copyfileobj(reader.raw, file_handle)
def download_from_s3_with_boto(url, output_file):
bucket_name, object_key = extract_s3_bucket_key(url)
s3_client = archival.Archival._get_s3_client()
s3_client.download_file(bucket_name, object_key, output_file)
def download_from_s3(url):
"""Download file from S3 bucket by a given URL."""
if not url:
raise DownloadError("Download URL not found")
LOGGER.info("Downloading.", url=url)
filename = os.path.join(mkdtemp_in_build_dir(), url.split("/")[-1].split("?")[0])
arch = platform.uname().machine.lower()
if is_s3_presigned_url(url) or arch.startswith(("s390", "ppc")):
# S3 presigned URL can't be downloaded with boto3 library;
# S390 and PPC architectures do not have adequate credentials;
# thus we fall back using standard requests library
download_from_s3_with_requests(url, filename)
else:
# Prefer boto3 library when possible.
# boto3 library is much faster because it use multipart download.
download_from_s3_with_boto(url, filename)
return filename
def _rsync_move_dir(source_dir, dest_dir):
"""
Move dir.
Move the contents of `source_dir` into `dest_dir` as a subdir while merging with
all existing dirs.
This is similar to the behavior of `rsync` but different to `mv`.
"""
for cur_src_dir, _, files in os.walk(source_dir):
cur_dest_dir = cur_src_dir.replace(source_dir, dest_dir, 1)
if not os.path.exists(cur_dest_dir):
os.makedirs(cur_dest_dir)
for cur_file in files:
src_file = os.path.join(cur_src_dir, cur_file)
dst_file = os.path.join(cur_dest_dir, cur_file)
if os.path.exists(dst_file):
# in case of the src and dst are the same file
if os.path.samefile(src_file, dst_file):
continue
os.remove(dst_file)
shutil.move(src_file, cur_dest_dir)
def extract_archive(archive_file, install_dir):
"""Uncompress file and return root of extracted directory."""
LOGGER.info("Extracting archive data.", archive=archive_file, install_dir=install_dir)
temp_dir = mkdtemp_in_build_dir()
archive_name = os.path.basename(archive_file)
_, file_suffix = os.path.splitext(archive_name)
if file_suffix == ".zip":
# Support .zip downloads, used for Windows binaries.
with zipfile.ZipFile(archive_file) as zip_handle:
zip_handle.extractall(temp_dir)
elif file_suffix == ".tgz":
# Support .tgz downloads, used for Linux binaries.
with contextlib.closing(tarfile.open(archive_file, "r:gz")) as tar_handle:
tar_handle.extractall(path=temp_dir)
else:
raise DownloadError(f"Unsupported file extension {file_suffix}")
# Pre-hygienic tarballs have a unique top-level dir when untarred. We ignore
# that dir to ensure the untarred dir structure is uniform. symbols and artifacts
# are rarely used on pre-hygienic versions so we ignore them for simplicity.
bin_archive_root = glob.glob(os.path.join(temp_dir, "mongodb-*", "bin"))
if bin_archive_root:
temp_dir = bin_archive_root[0]
try:
os.makedirs(install_dir)
except FileExistsError:
pass
_rsync_move_dir(temp_dir, install_dir)
shutil.rmtree(temp_dir)
LOGGER.info("Extract archive completed.", installed_dir=install_dir)
return install_dir
def mkdir_p(path):
"""Python equivalent of `mkdir -p`."""
try:
os.makedirs(path)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
def symlink_version(suffix, installed_dir, link_dir=None):
"""
Symlink the binaries in the 'installed_dir' to the 'link_dir'.
If `link_dir` is None, link to the physical executable's directory (`bin_dir`).
"""
hygienic_bin_dir = build_hygienic_bin_path(parent=installed_dir)
if os.path.isdir(hygienic_bin_dir):
bin_dir = hygienic_bin_dir
else:
bin_dir = installed_dir
if link_dir is None:
link_dir = bin_dir
else:
mkdir_p(link_dir)
for executable in os.listdir(bin_dir):
if executable.endswith(".dll"):
LOGGER.debug("Skipping linking DLL", file=executable)
continue
executable_name, executable_extension = os.path.splitext(executable)
if suffix:
link_name = f"{executable_name}-{suffix}{executable_extension}"
else:
link_name = executable
try:
executable = os.path.join(bin_dir, executable)
executable_link = os.path.join(link_dir, link_name)
link_method = os.symlink
if os.name == "nt":
# os.symlink is not supported on Windows, use a direct method instead.
def symlink_ms(source, symlink_name):
"""Provide symlink for Windows."""
import ctypes
csl = ctypes.windll.kernel32.CreateSymbolicLinkW
csl.argtypes = (ctypes.c_wchar_p, ctypes.c_wchar_p, ctypes.c_uint32)
csl.restype = ctypes.c_ubyte
flags = 1 if os.path.isdir(source) else 0
if csl(symlink_name, source.replace("/", "\\"), flags) == 0:
raise ctypes.WinError()
link_method = symlink_ms
link_method(executable, executable_link)
LOGGER.debug("Symlink created.", executable=executable, executable_link=executable_link)
except OSError as exc:
if exc.errno == errno.EEXIST:
pass
else:
raise
LOGGER.info("Symlinks for all executables are created in the directory.", link_dir=link_dir)
return link_dir