SERVER-116213, SERVER-117626, SERVER-117790: Enhance SBOM automation with private folder support, team automation and branch filtering (#48555)
Co-authored-by: Mathias Stearn <mathias@mongodb.com> GitOrigin-RevId: 0ed281764f145dc335b24cc49112f018a921f94b
This commit is contained in:
@@ -92,7 +92,7 @@ filters:
|
||||
- "README.third_party.md":
|
||||
approvers:
|
||||
- 10gen/code-review-team-ssdlc
|
||||
- "sbom.json":
|
||||
- "sbom.*":
|
||||
approvers:
|
||||
- 10gen/code-review-team-ssdlc
|
||||
- "MODULE.bazel*":
|
||||
|
||||
@@ -24,9 +24,7 @@ a notice will be included in
|
||||
| Name | License | Vendored Version | Emits persisted data | Distributed in Release Binaries |
|
||||
| ---------------------------------------------------- | ---------------------------------------------- | ---------------------------------------- | -------------------- | ------------------------------- |
|
||||
| [Abseil Common Libraries (C++)] | Apache-2.0 | 20250512.1 | | ✗ |
|
||||
| [Apache Avro C++] | Apache-2.0 | 1.12.0 | | ✗ |
|
||||
| [Asio C++ Library] | BSL-1.0 | 1.34.2 | | ✗ |
|
||||
| [AWS SDK for C++] | Apache-2.0 | 1.11.471 | | ✗ |
|
||||
| [benchmark] | Apache-2.0 | 1.5.2 | | |
|
||||
| [Boost C++ Libraries] | BSL-1.0 | 1.88.0 | | ✗ |
|
||||
| [c-ares] | MIT | 1.27.0 | | ✗ |
|
||||
@@ -35,17 +33,17 @@ a notice will be included in
|
||||
| [Cyrus SASL] | BSD-Attribution-HPND-disclaimer | 2.1.28 | | |
|
||||
| [fmt] | MIT | 11.2.0 | | ✗ |
|
||||
| [folly] | Apache-2.0 | 2023.12.25.00 | | ✗ |
|
||||
| [fuzztest] | BSD-3-Clause, Apache-2.0, HPND | 2025-07-28 | | |
|
||||
| [googletest] | BSD-3-Clause | 1.17.0 | | |
|
||||
| [gperftools] | BSD-3-Clause | 2.9.1 | | ✗ |
|
||||
| [gRPC (C++)] | Apache-2.0 | 1.74.1 | | ✗ |
|
||||
| [ICU4C - International Components for Unicode C/C++] | Unicode-3.0 | 57.1 | ✗ | ✗ |
|
||||
| [immer] | BSL-1.0 | 0b3aaf699b9d6f2e89f8e2b6d1221c307e02bda3 | | ✗ |
|
||||
| [immer] | BSL-1.0 | 0.9.1 | | ✗ |
|
||||
| [Intel® Decimal Floating-Point Math Library] | BSD-3-Clause | 2.0.1 | | ✗ |
|
||||
| [JSON Schema Store] | Apache-2.0 | 6847cfc3a17a04a7664474212db50c627e1e3408 | | |
|
||||
| [JSON-Schema-Test-Suite] | MIT | 728066f9c5c258ba3b1804a22a5b998f2ec77ec0 | | |
|
||||
| [libdwarf] | LGPL-2.1-or-later, BSD-3-Clause, Public Domain | 2.1.0 | | |
|
||||
| [libmongocrypt] | Apache-2.0 | 1.15.0 | ✗ | ✗ |
|
||||
| [librdkafka - The Apache Kafka C/C++ library] | BSD-2-Clause | 2.6.0 | | ✗ |
|
||||
| [LibTomCrypt] | Unlicense | 1.18.2 | ✗ | ✗ |
|
||||
| [libunwind] | MIT | 1.8.1 | | ✗ |
|
||||
| [linenoise] | BSD-2-Clause | 6cdc775807e57b2c3fd64bd207814f8ee1fe35f3 | | ✗ |
|
||||
@@ -57,12 +55,14 @@ a notice will be included in
|
||||
| [opentelemetry-cpp] | Apache-2.0 | 1.24.0 | ✗ | |
|
||||
| [opentelemetry-proto] | Apache-2.0 | 1.3.2 | ✗ | |
|
||||
| [PCRE2 - Perl-Compatible Regular Expressions] | BSD-3-Clause WITH PCRE2-exception | 10.40 | | ✗ |
|
||||
| [Prometheus Client Library for Modern C++] | MIT | 1.2.2 | | |
|
||||
| [Protobuf] | BSD-3-Clause | 6.31.1 | | ✗ |
|
||||
| [pypi/ocspbuilder] | MIT | 0.10.2 | | |
|
||||
| [pypi/ocspresponder] | Apache-2.0 | 0.5.0 | | |
|
||||
| [re2] | BSD-3-Clause | 2025-08-05 | | ✗ |
|
||||
| [S2 Geometry Library] | Apache-2.0 | a25c502bda9d7e0274b9e2b7825fbddf13cc0306 | ✗ | ✗ |
|
||||
| [SafeInt] | MIT | 3.0.28a | | ✗ |
|
||||
| [siphash] | CC0-1.0, MIT, Apache 2.0 with LLVM exception | eee7d0d84dc7731df2359b243aa5e75d85f6eaef | | ✗ |
|
||||
| [snappy] | BSD-3-Clause | 1.1.10 | ✗ | ✗ |
|
||||
| [Snowball Stemming Algorithms (libstemmer)] | BSD-3-Clause | 1.0.0 | ✗ | ✗ |
|
||||
| [tcmalloc] | Apache-2.0 | f3b20f9a07e175c5d897df7b49d9830d4efa6110 | | ✗ |
|
||||
@@ -73,11 +73,8 @@ a notice will be included in
|
||||
| [yaml-cpp] | MIT | 0.6.3 | | ✗ |
|
||||
| [zlib] | Zlib | 1.3.1 | ✗ | ✗ |
|
||||
| [Zstandard (zstd)] | BSD-3-Clause OR GPL-2.0-only | 1.5.5 | ✗ | ✗ |
|
||||
| [siphash] | MIT | f26d35e964c6290ffe23d9043475ad3129f409e0 | | ✗ |
|
||||
|
||||
[AWS SDK for C++]: https://github.com/aws/aws-sdk-cpp.git
|
||||
[Abseil Common Libraries (C++)]: https://github.com/abseil/abseil-cpp.git
|
||||
[Apache Avro C++]: https://github.com/apache/avro.git
|
||||
[Asio C++ Library]: https://github.com/chriskohlhoff/asio.git
|
||||
[Boost C++ Libraries]: https://github.com/boostorg/boost.git
|
||||
[CRoaring]: https://github.com/roaringbitmap/croaring.git
|
||||
@@ -91,6 +88,7 @@ a notice will be included in
|
||||
[Mozilla Firefox ESR]: https://github.com/mozilla-firefox/firefox.git
|
||||
[MurmurHash3]: https://github.com/aappleby/smhasher/blob/a6bd3ce/
|
||||
[PCRE2 - Perl-Compatible Regular Expressions]: https://github.com/pcre2project/pcre2.git
|
||||
[Prometheus Client Library for Modern C++]: https://github.com/jupp0r/prometheus-cpp.git
|
||||
[Protobuf]: https://github.com/protocolbuffers/protobuf.git
|
||||
[S2 Geometry Library]: https://github.com/google/s2geometry.git
|
||||
[SafeInt]: https://github.com/dcleblanc/safeint.git
|
||||
@@ -103,13 +101,13 @@ a notice will be included in
|
||||
[cpptrace]: https://github.com/jeremy-rifkin/cpptrace.git
|
||||
[fmt]: https://github.com/fmtlib/fmt.git
|
||||
[folly]: https://github.com/facebook/folly.git
|
||||
[fuzztest]: https://github.com/google/fuzztest.git
|
||||
[gRPC (C++)]: https://github.com/grpc/grpc.git
|
||||
[googletest]: https://github.com/google/googletest.git
|
||||
[gperftools]: https://github.com/gperftools/gperftools.git
|
||||
[immer]: https://github.com/arximboldi/immer.git
|
||||
[libdwarf]: https://github.com/davea42/libdwarf-code.git
|
||||
[libmongocrypt]: https://github.com/mongodb/libmongocrypt.git
|
||||
[librdkafka - The Apache Kafka C/C++ library]: https://github.com/confluentinc/librdkafka.git
|
||||
[libunwind]: https://github.com/libunwind/libunwind.git
|
||||
[linenoise]: https://github.com/antirez/linenoise
|
||||
[nlohmann/json]: https://github.com/nlohmann/json.git
|
||||
@@ -119,13 +117,13 @@ a notice will be included in
|
||||
[pypi/ocspbuilder]: https://pypi.org/project/ocspbuilder/
|
||||
[pypi/ocspresponder]: https://pypi.org/project/ocspresponder/
|
||||
[re2]: https://github.com/google/re2.git
|
||||
[siphash]: https://github.com/veorq/siphash/
|
||||
[snappy]: https://github.com/google/tcmalloc.git
|
||||
[tcmalloc]: https://github.com/google/tcmalloc.git
|
||||
[timelib]: https://github.com/derickr/timelib.git
|
||||
[valgrind.h]: https://sourceware.org/git/valgrind.git
|
||||
[yaml-cpp]: https://github.com/jbeder/yaml-cpp.git
|
||||
[zlib]: https://zlib.net/fossils/
|
||||
[siphash]: https://github.com/veorq/SipHash
|
||||
|
||||
## Dynamically Linked Libraries
|
||||
|
||||
|
||||
@@ -434,7 +434,7 @@ def run_rules_lint(bazel_bin: str, args: list[str]):
|
||||
if file.endswith((SUPPORTED_EXTENSIONS))
|
||||
]
|
||||
|
||||
if lint_all or "sbom.json" in files_to_lint:
|
||||
if lint_all or "sbom.private.json" in files_to_lint:
|
||||
lr.run_bazel("//buildscripts:sbom_linter")
|
||||
|
||||
if lint_all or any(file.endswith((".h", ".cpp")) for file in files_to_lint):
|
||||
|
||||
@@ -27,3 +27,9 @@ py_binary(
|
||||
srcs = ["sbom_files_pr.py"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
py_library(
|
||||
name = "sbom_utils",
|
||||
srcs = ["sbom_utils.py"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
@@ -38,8 +38,10 @@ for component in components_remove:
|
||||
|
||||
# List of folders in src/third_party to exclude from SBOM generation warnings
|
||||
third_party_folders_remove = [
|
||||
"scripts",
|
||||
"boringssl_replacement", # this is an alias folder
|
||||
"src/third_party/scripts", # this folder contains scripts related to the import process, but does not contain SBOM components itself
|
||||
"src/third_party/private", # this is not a real third-party folder, but rather a place for MongoDB to store private forks of third-party code. The actual SBOM components in this folder are still included.
|
||||
"src/third_party/boringssl_replacement", # this is an alias folder
|
||||
"src/third_party/wasmtime", # currently no targets depend on this
|
||||
]
|
||||
|
||||
# ################ Component Renaming ################
|
||||
@@ -49,14 +51,13 @@ third_party_folders_remove = [
|
||||
# Valid: pkg:github/abseil/abseil-cpp@20250512.1
|
||||
# Run string replacements to correct for this:
|
||||
endor_components_rename = [
|
||||
["pkg:c/sourceware.org/git/valgrind", "pkg:generic/valgrind/valgrind"],
|
||||
["pkg:generic/sourceware.org/git/valgrind", "pkg:generic/valgrind/valgrind"],
|
||||
["pkg:generic/zlib", "pkg:github/madler/zlib"],
|
||||
["pkg:generic/libstemmer", "pkg:github/snowballstem/snowball"],
|
||||
["pkg:generic/intel-dfp-math", "pkg:generic/intel/IntelRDFPMathLib"],
|
||||
["pkg:c/git.openldap.org/openldap/openldap", "pkg:generic/openldap/openldap"],
|
||||
["pkg:generic/github.com/", "pkg:github/"],
|
||||
["pkg:c/github.com/", "pkg:github/"],
|
||||
["pkg:generic/gitlab.gnome.org/gnome/libxml2", "pkg:generic/gnome/libxml2"],
|
||||
["pkg:generic/gitlab.com/bzip2/bzip2", "pkg:github/libarchive/bzip2"],
|
||||
]
|
||||
|
||||
# ################ Version Transformation ################
|
||||
|
||||
@@ -8,13 +8,11 @@ Invoke with ---help or -h for help message.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.parse
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
@@ -29,6 +27,19 @@ from config import (
|
||||
from endorctl_utils import EndorCtl
|
||||
from git import Commit, Repo
|
||||
|
||||
from buildscripts.sbom.sbom_utils import (
|
||||
add_component_property,
|
||||
check_metadata_sbom,
|
||||
convert_sbom_to_public,
|
||||
read_sbom_json_file,
|
||||
remove_sbom_component,
|
||||
sbom_components_to_dict,
|
||||
set_component_version,
|
||||
set_dependency_version,
|
||||
write_sbom_json_file,
|
||||
)
|
||||
from buildscripts.util.codeowners_utils import Owners
|
||||
|
||||
# region init
|
||||
|
||||
|
||||
@@ -65,65 +76,6 @@ REGEX_GITHUB_URL = r"^(https://github.com/)([a-zA-Z0-9-]{1,39}/[a-zA-Z0-9-_.]{1,
|
||||
REGEX_RELEASE_BRANCH = r"^v\d\.\d$"
|
||||
REGEX_RELEASE_TAG = r"^r\d\.\d.\d(-\w*)?$"
|
||||
|
||||
# ################ PURL Validation ################
|
||||
REGEX_STR_PURL_OPTIONAL = ( # Optional Version (any chars except ? @ #)
|
||||
r"(?:@[^?@#]*)?"
|
||||
# Optional Qualifiers (any chars except @ #)
|
||||
r"(?:\?[^@#]*)?"
|
||||
# Optional Subpath (any chars)
|
||||
r"(?:#.*)?$"
|
||||
)
|
||||
|
||||
REGEX_PURL = {
|
||||
# deb PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/deb-definition.md
|
||||
"deb": re.compile(
|
||||
r"^pkg:deb/" # Scheme and type
|
||||
# Namespace (organization/user), letters must be lowercase
|
||||
r"(debian|ubuntu)+"
|
||||
r"/"
|
||||
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name
|
||||
),
|
||||
# Generic PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/generic-definition.md
|
||||
"generic": re.compile(
|
||||
r"^pkg:generic/" # Scheme and type
|
||||
r"([a-zA-Z0-9._-]+/)?" # Optional namespace segment
|
||||
r"[a-zA-Z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (required)
|
||||
),
|
||||
# GitHub PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/github-definition.md
|
||||
"github": re.compile(
|
||||
r"^pkg:github/" # Scheme and type
|
||||
# Namespace (organization/user), letters must be lowercase
|
||||
r"[a-z0-9-]+"
|
||||
r"/"
|
||||
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (repository)
|
||||
),
|
||||
# PyPI PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/pypi-definition.md
|
||||
"pypi": re.compile(
|
||||
r"^pkg:pypi/" # Scheme and type
|
||||
r"[a-z0-9_-]+" # Name, letters must be lowercase, dashes, underscore
|
||||
+ REGEX_STR_PURL_OPTIONAL
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
# Metadata SBOM requirements
|
||||
METADATA_FIELDS_REQUIRED = [
|
||||
"type",
|
||||
"bom-ref",
|
||||
"group",
|
||||
"name",
|
||||
"version",
|
||||
"description",
|
||||
"licenses",
|
||||
"copyright",
|
||||
"externalReferences",
|
||||
"scope",
|
||||
]
|
||||
METADATA_FIELDS_ONE_OF = [
|
||||
["author", "supplier"],
|
||||
["purl", "cpe"],
|
||||
]
|
||||
|
||||
# endregion init
|
||||
|
||||
|
||||
@@ -146,7 +98,7 @@ class GitInfo:
|
||||
).stdout.strip()
|
||||
)
|
||||
self._repo = Repo(self.repo_root)
|
||||
except Exception as e:
|
||||
except (OSError, subprocess.CalledProcessError, AttributeError, TypeError) as e:
|
||||
logger.warning(
|
||||
"Unable to read git repo information. All necessary script arguments must be provided."
|
||||
)
|
||||
@@ -168,7 +120,7 @@ class GitInfo:
|
||||
filtered_tags = [
|
||||
tag for tag in self._repo.tags if re.fullmatch(REGEX_RELEASE_TAG, tag.name)
|
||||
]
|
||||
logging.info(f"GIT: Parsing {len(filtered_tags)} release tags for match to commit")
|
||||
logging.info("GIT: Parsing %d release tags for match to commit", len(filtered_tags))
|
||||
for tag in filtered_tags:
|
||||
if tag.commit == self.commit:
|
||||
release_tags.append(tag.name)
|
||||
@@ -176,10 +128,10 @@ class GitInfo:
|
||||
self.release_tag = release_tags[-1]
|
||||
else:
|
||||
self.release_tag = None
|
||||
logging.debug(f"GitInfo->release_tag(): {self.release_tag}")
|
||||
logging.debug("GitInfo->release_tag(): %s", self.release_tag)
|
||||
|
||||
logging.debug(f"GitInfo->__init__: {self}")
|
||||
except Exception as e:
|
||||
logging.debug("GitInfo->__init__: %s", self)
|
||||
except (AttributeError, IndexError, ValueError, TypeError) as e:
|
||||
logger.warning("Unable to fully parse git info.")
|
||||
logger.warning(e)
|
||||
|
||||
@@ -232,76 +184,6 @@ def extract_repo_from_git_url(git_url: str) -> dict:
|
||||
}
|
||||
|
||||
|
||||
def is_valid_purl(purl: str) -> bool:
|
||||
"""Validate a GitHub or Generic PURL"""
|
||||
for purl_type, regex in REGEX_PURL.items():
|
||||
if regex.match(purl):
|
||||
logger.debug(f"PURL: {purl} matched PURL type '{purl_type}' regex '{regex.pattern}'")
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def sbom_components_to_dict(sbom: dict, with_version: bool = False) -> dict:
|
||||
"""Create a dict of SBOM components with a version-less PURL as the key"""
|
||||
components = sbom["components"]
|
||||
if with_version:
|
||||
components_dict = {
|
||||
urllib.parse.unquote(component["bom-ref"]): component for component in components
|
||||
}
|
||||
else:
|
||||
components_dict = {
|
||||
urllib.parse.unquote(component["bom-ref"]).split("@")[0]: component
|
||||
for component in components
|
||||
}
|
||||
return components_dict
|
||||
|
||||
|
||||
def check_metadata_sbom(meta_bom: dict) -> None:
|
||||
"""Run checks on SBOM component metadata for expected fields."""
|
||||
for component in meta_bom["components"]:
|
||||
for field in METADATA_FIELDS_REQUIRED:
|
||||
if field not in component:
|
||||
logger.warning(
|
||||
f"METADATA: '{component['bom-ref'] or component['name']} is missing required field '{field}'."
|
||||
)
|
||||
for fields in METADATA_FIELDS_ONE_OF:
|
||||
found = False
|
||||
for field in fields:
|
||||
found = found or field in component
|
||||
if not found:
|
||||
logger.warning(
|
||||
f"METADATA: '{component['bom-ref'] or component['name']} is missing one of fields '{fields}'."
|
||||
)
|
||||
|
||||
|
||||
def read_sbom_json_file(file_path: str) -> dict:
|
||||
"""Load a JSON SBOM file (schema is not validated)"""
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as input_json:
|
||||
sbom_json = input_json.read()
|
||||
result = json.loads(sbom_json)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading SBOM file from {file_path}")
|
||||
logger.error(e)
|
||||
else:
|
||||
logger.info(f"SBOM loaded from {file_path} with {len(result['components'])} components")
|
||||
return result
|
||||
|
||||
|
||||
def write_sbom_json_file(sbom_dict: dict, file_path: str) -> None:
|
||||
"""Save a JSON SBOM file (schema is not validated)"""
|
||||
try:
|
||||
file_path = os.path.abspath(file_path)
|
||||
with open(file_path, "w", encoding="utf-8") as output_json:
|
||||
formatted_sbom = json.dumps(sbom_dict, indent=2) + "\n"
|
||||
output_json.write(formatted_sbom)
|
||||
except Exception as e:
|
||||
logger.error(f"Error writing SBOM file to {file_path}")
|
||||
logger.error(e)
|
||||
else:
|
||||
logger.info(f"SBOM file saved to {file_path}")
|
||||
|
||||
|
||||
def write_list_to_text_file(str_list: list, file_path: str) -> None:
|
||||
"""Save a list of strings to a text file"""
|
||||
try:
|
||||
@@ -309,79 +191,48 @@ def write_list_to_text_file(str_list: list, file_path: str) -> None:
|
||||
with open(file_path, "w", encoding="utf-8") as output_txt:
|
||||
for item in str_list:
|
||||
output_txt.write(f"{item}\n")
|
||||
except Exception as e:
|
||||
logger.error(f"Error writing text file to {file_path}")
|
||||
except OSError as e:
|
||||
logger.error("Error writing text file to %s", file_path)
|
||||
logger.error(e)
|
||||
else:
|
||||
logger.info(f"Text file saved to {file_path}")
|
||||
logger.info("Text file saved to %s", file_path)
|
||||
|
||||
|
||||
def set_component_version(
|
||||
component: dict, version: str, purl_version: str = None, cpe_version: str = None
|
||||
) -> None:
|
||||
"""Update the appropriate version fields in a component from the metadata SBOM"""
|
||||
if not purl_version:
|
||||
purl_version = version
|
||||
def get_subfolders_list(repo_root: str, base_folder_path: str = ".", subfolders=None) -> list:
|
||||
"""Get list of all directories in the specified path and subfolders"""
|
||||
|
||||
if not cpe_version:
|
||||
cpe_version = version
|
||||
if subfolders is None:
|
||||
subfolders = set()
|
||||
subfolders.add(
|
||||
""
|
||||
) # Ensure set includes blank to cover search of base folder without a subfolder
|
||||
folders = []
|
||||
|
||||
component["bom-ref"] = component["bom-ref"].replace("{{VERSION}}", purl_version)
|
||||
component["version"] = component["version"].replace("{{VERSION}}", version)
|
||||
if component.get("purl"):
|
||||
component["purl"] = component["purl"].replace(
|
||||
"{{VERSION}}", urllib.parse.quote(purl_version)
|
||||
)
|
||||
if not is_valid_purl(component["purl"]):
|
||||
logger.warning(f"PURL: Invalid PURL ({component['purl']})")
|
||||
if component.get("cpe"):
|
||||
component["cpe"] = component["cpe"].replace("{{VERSION}}", cpe_version)
|
||||
|
||||
|
||||
def set_dependency_version(dependencies: list, meta_bom_ref: str, purl_version: str) -> None:
|
||||
"""Update the appropriate dependency version fields in the metadata SBOM"""
|
||||
r = 0
|
||||
d = 0
|
||||
for dependency in dependencies:
|
||||
if "{{VERSION}}" in dependency["ref"] and dependency["ref"] == meta_bom_ref:
|
||||
dependency["ref"] = dependency["ref"].replace("{{VERSION}}", purl_version)
|
||||
r += 1
|
||||
for i in range(len(dependency["dependsOn"])):
|
||||
if dependency["dependsOn"][i] == meta_bom_ref:
|
||||
dependency["dependsOn"][i] = dependency["dependsOn"][i].replace(
|
||||
"{{VERSION}}", purl_version
|
||||
)
|
||||
d += 1
|
||||
|
||||
logger.debug(f"set_dependency_version: '{meta_bom_ref}' updated {r} refs and {d} dependsOn")
|
||||
|
||||
|
||||
def get_subfolders_dict(folder_path: str = ".") -> dict:
|
||||
"""Get list of all directories in the specified path"""
|
||||
subfolders = []
|
||||
try:
|
||||
# Get all entries (files and directories) in the specified path
|
||||
entries = os.listdir(folder_path)
|
||||
for subfolder in subfolders:
|
||||
folder_path = os.path.join(repo_root, base_folder_path, subfolder)
|
||||
logger.info("Getting subfolders in: %s", folder_path)
|
||||
# Get all entries (files and directories) in the specified path
|
||||
folders.extend(
|
||||
[
|
||||
os.path.join(base_folder_path, subfolder, item)
|
||||
for item in os.listdir(folder_path)
|
||||
]
|
||||
)
|
||||
logger.debug("Found folders: %s", folders)
|
||||
|
||||
# Filter for directories
|
||||
for entry in entries:
|
||||
full_path = os.path.join(folder_path, entry)
|
||||
if os.path.isdir(full_path):
|
||||
subfolders.append(entry)
|
||||
folders = [folder for folder in folders if os.path.isdir(folder)]
|
||||
folders.sort()
|
||||
return folders
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Error: Directory '{folder_path}' not found.")
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred: {e}")
|
||||
|
||||
subfolders.sort()
|
||||
return {key: 0 for key in subfolders}
|
||||
|
||||
|
||||
def add_component_property(component: dict, name: str, value: str) -> None:
|
||||
"""Add a key/value to to 'properties' in SBOM component"""
|
||||
if "properties" not in component:
|
||||
component["properties"] = []
|
||||
component["properties"].append({"name": name, "value": value})
|
||||
logger.error("Error: Directory '%s' not found.", os.path.join(base_folder_path, subfolder))
|
||||
except (PermissionError, OSError) as e:
|
||||
logger.error(
|
||||
"An error occurred while accessing the directory '%s'.",
|
||||
os.path.join(base_folder_path, subfolder),
|
||||
)
|
||||
logger.error(e)
|
||||
|
||||
|
||||
def get_component_import_script_path(component: dict) -> str:
|
||||
@@ -420,13 +271,14 @@ def del_component_priority_version_source(component: dict) -> None:
|
||||
for i in range(len(component["properties"]) - 1, -1, -1):
|
||||
if component["properties"][i].get("name") == "generate_sbom:priority_version_source":
|
||||
logger.debug(
|
||||
f"PRIORITY VERSION SOURCE: {component['bom-ref']}: Removing priority version source from SBOM metadata."
|
||||
"PRIORITY VERSION SOURCE: %s: Removing priority version source from SBOM metadata.",
|
||||
component["bom-ref"],
|
||||
)
|
||||
del component["properties"][i]
|
||||
|
||||
|
||||
def get_version_from_import_script(file_path: str) -> str:
|
||||
"""A rudimentary parse of a shell script file to extract the static value defined for the VERSION variable"""
|
||||
"""A rudimentary parse of a shell or python script file to extract the static value defined for the VERSION variable"""
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as file:
|
||||
for line in file:
|
||||
@@ -436,17 +288,38 @@ def get_version_from_import_script(file_path: str) -> str:
|
||||
r"\g<content>",
|
||||
line.strip(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Unable to load {file_path}")
|
||||
elif line.strip().startswith("VERSION = "):
|
||||
return re.sub(
|
||||
r"^VERSION\s=\s(?P<quote>[\"']?)(?P<content>\S+)(?P=quote).*$",
|
||||
r"\g<content>",
|
||||
line.strip(),
|
||||
)
|
||||
except OSError as e:
|
||||
logger.warning("Unable to load %s", file_path)
|
||||
logger.warning(e)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def deduplicate_list_of_dicts(list_of_dicts):
|
||||
"""Deduplicate a list of dicts while preserving order. Dicts must be hashable (i.e., contain only hashable types)"""
|
||||
seen = set()
|
||||
unique_list = []
|
||||
for d in list_of_dicts:
|
||||
# Convert dict items to frozenset for hashability
|
||||
frozenset_items = frozenset(d.items())
|
||||
if frozenset_items not in seen:
|
||||
seen.add(frozenset_items)
|
||||
unique_list.append(d)
|
||||
return unique_list
|
||||
|
||||
|
||||
# endregion functions and classes
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Main function to generate SBOM"""
|
||||
|
||||
# region define args
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
@@ -508,14 +381,26 @@ def main() -> None:
|
||||
)
|
||||
files.add_argument(
|
||||
"--sbom-in",
|
||||
help="Input path for previous SBOM file (Default: './sbom.json')",
|
||||
help="Input path for previous SBOM file (Default: './sbom.private.json')",
|
||||
default="./sbom.private.json",
|
||||
type=str,
|
||||
)
|
||||
files.add_argument(
|
||||
"--sbom-out-public",
|
||||
help="Output path for public SBOM file (Default: './sbom.json')",
|
||||
default="./sbom.json",
|
||||
type=str,
|
||||
)
|
||||
files.add_argument(
|
||||
"--sbom-out",
|
||||
help="Output path for SBOM file (Default: './sbom.json')",
|
||||
default="./sbom.json",
|
||||
"--sbom-out-internal",
|
||||
help="Output path for internal SBOM file (Default: './sbom.private.json')",
|
||||
default="./sbom.private.json",
|
||||
type=str,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--branch-filter",
|
||||
help="Run only if Git repo branch matches regex (Default: '.*')",
|
||||
default=".*",
|
||||
type=str,
|
||||
)
|
||||
parser.add_argument(
|
||||
@@ -581,8 +466,16 @@ def main() -> None:
|
||||
)
|
||||
git_info.branch = args.branch
|
||||
|
||||
# Check if branch matches the branch filter regex
|
||||
if not re.fullmatch(args.branch_filter, git_info.branch):
|
||||
print(
|
||||
f"Branch '{git_info.branch}' does not match branch filter '{args.branch_filter}'. Terminating as successful."
|
||||
)
|
||||
sys.exit(0)
|
||||
|
||||
# files
|
||||
sbom_out_path = args.sbom_out
|
||||
sbom_out_public_path = args.sbom_out_public
|
||||
sbom_out_internal_path = args.sbom_out_internal
|
||||
sbom_in_path = args.sbom_in
|
||||
sbom_metadata_path = args.sbom_metadata
|
||||
save_warnings = args.save_warnings
|
||||
@@ -668,15 +561,18 @@ def main() -> None:
|
||||
|
||||
print_banner("Loading metadata SBOM and previous SBOM")
|
||||
|
||||
meta_bom = read_sbom_json_file(sbom_metadata_path)
|
||||
if not meta_bom:
|
||||
logger.error("No SBOM metadata. This is fatal.")
|
||||
if os.path.exists(sbom_metadata_path):
|
||||
meta_bom = read_sbom_json_file(sbom_metadata_path)
|
||||
else:
|
||||
logger.error("No SBOM metadata file at '%s'. This is fatal.", sbom_metadata_path)
|
||||
sys.exit(1)
|
||||
|
||||
prev_bom = read_sbom_json_file(sbom_in_path)
|
||||
if not prev_bom:
|
||||
if os.path.exists(sbom_in_path):
|
||||
prev_bom = read_sbom_json_file(sbom_in_path)
|
||||
else:
|
||||
logger.warning(
|
||||
"Unable to load previous SBOM data. The new SBOM will be generated without any previous context. This is unexpected, but not fatal."
|
||||
"PREVIOUS SBOM: No previous SBOM file at `%s`. The new SBOM will be generated without any previous context. This is unexpected, but not fatal.",
|
||||
sbom_in_path,
|
||||
)
|
||||
# Create empty prev_bom to avoid downstream processing errors
|
||||
prev_bom = {
|
||||
@@ -714,7 +610,10 @@ def main() -> None:
|
||||
|
||||
# Attempt to determine the MongoDB Version being scanned
|
||||
logger.debug(
|
||||
f"Available MongoDB version options, tag: {git_info.release_tag}, branch: {git_info.branch}, previous SBOM: {prev_bom['metadata']['component']['version']}"
|
||||
"Available MongoDB version options, tag: %s, branch: %s, previous SBOM: %s",
|
||||
git_info.release_tag,
|
||||
git_info.branch,
|
||||
prev_bom["metadata"]["component"]["version"],
|
||||
)
|
||||
meta_bom_ref = meta_bom["metadata"]["component"]["bom-ref"]
|
||||
|
||||
@@ -730,7 +629,7 @@ def main() -> None:
|
||||
version = git_info.release_tag[1:] # remove leading 'r'
|
||||
purl_version = git_info.release_tag
|
||||
cpe_version = version # without leading 'r'
|
||||
logger.info(f"Using release_tag '{git_info.release_tag}' as MongoDB version")
|
||||
logger.info("Using release_tag '%s' as MongoDB version", git_info.release_tag)
|
||||
|
||||
# Release branch e.g., v7.0 or v8.2
|
||||
elif target == "branch" and re.fullmatch(REGEX_RELEASE_BRANCH, git_info.branch):
|
||||
@@ -738,7 +637,7 @@ def main() -> None:
|
||||
purl_version = git_info.branch
|
||||
# remove leading 'v', add wildcard. e.g. 8.2.*
|
||||
cpe_version = version[1:] + ".*"
|
||||
logger.info(f"Using release branch '{git_info.branch}' as MongoDB version")
|
||||
logger.info("Using release branch '%s' as MongoDB version", git_info.branch)
|
||||
|
||||
# Previous SBOM app version, if all needed specifiers exist
|
||||
elif (
|
||||
@@ -749,7 +648,7 @@ def main() -> None:
|
||||
version = prev_bom["metadata"]["component"]["version"]
|
||||
purl_version = prev_bom["metadata"]["component"]["purl"].split("@")[-1]
|
||||
cpe_version = prev_bom["metadata"]["component"]["cpe"].split(":")[5]
|
||||
logger.info(f"Using previous SBOM version '{version}' as MongoDB version")
|
||||
logger.info("Using previous SBOM version '%s' as MongoDB version", version)
|
||||
|
||||
else:
|
||||
# Fall back to the version specified in the Endor SBOM
|
||||
@@ -758,7 +657,8 @@ def main() -> None:
|
||||
purl_version = version
|
||||
cpe_version = version
|
||||
logger.warning(
|
||||
f"Using SBOM version '{version}' from Endor Labs scan. This is unlikely to be accurate and may specify a PR #."
|
||||
"Using SBOM version '%s' from Endor Labs scan. This is unlikely to be accurate and may specify a PR #.",
|
||||
version,
|
||||
)
|
||||
|
||||
# Set main component version
|
||||
@@ -772,10 +672,26 @@ def main() -> None:
|
||||
|
||||
# region Parse metadata SBOM components
|
||||
|
||||
third_party_folders = get_subfolders_dict(git_info.repo_root.as_posix() + "/src/third_party")
|
||||
third_party_folders = get_subfolders_list(
|
||||
git_info.repo_root.as_posix(), "src/third_party", {"private"}
|
||||
)
|
||||
logger.debug("Initial list of 'src/third_party' subfolders: %s", third_party_folders)
|
||||
|
||||
# Convert to a dictionary to count instances folders found in SBOM locations
|
||||
third_party_folders = dict.fromkeys(third_party_folders, 0)
|
||||
|
||||
# exclude folders specified in config.py
|
||||
for folder in third_party_folders_remove:
|
||||
del third_party_folders[folder]
|
||||
if folder in third_party_folders:
|
||||
del third_party_folders[folder]
|
||||
else:
|
||||
logger.warning(
|
||||
"THIRD_PARTY FOLDERS: folder '%s' specified for removal in config.py not found in 'src/third_party' folders list. Consider updating config.py.",
|
||||
folder,
|
||||
)
|
||||
|
||||
# Load codeowners data for later lookup
|
||||
owners = Owners()
|
||||
|
||||
for component in meta_bom["components"]:
|
||||
versions = {
|
||||
@@ -786,6 +702,8 @@ def main() -> None:
|
||||
}
|
||||
|
||||
component_key = component["bom-ref"].split("@")[0]
|
||||
if "properties" not in component:
|
||||
component["properties"] = []
|
||||
|
||||
print_banner("Component: " + component_key)
|
||||
|
||||
@@ -795,7 +713,9 @@ def main() -> None:
|
||||
if priority_version_source:
|
||||
versions["priority_version_source"] = priority_version_source
|
||||
logger.info(
|
||||
f"PRIORITY VERSION SOURCE: {component_key}: Set priority version source to '{priority_version_source}'"
|
||||
"PRIORITY VERSION SOURCE: %s: Set priority version source to '%s'",
|
||||
component_key,
|
||||
priority_version_source,
|
||||
)
|
||||
del_component_priority_version_source(component)
|
||||
|
||||
@@ -807,7 +727,9 @@ def main() -> None:
|
||||
component["properties"].extend(endor_component.get("properties", []))
|
||||
versions["endor"] = endor_component.get("version")
|
||||
logger.debug(
|
||||
f"VERSION ENDOR: {component_key}: Found version '{versions['endor']}' in Endor Labs results"
|
||||
"VERSION ENDOR: %s: Found version '%s' in Endor Labs results",
|
||||
component_key,
|
||||
versions["endor"],
|
||||
)
|
||||
|
||||
############## Import Script ###############
|
||||
@@ -821,11 +743,16 @@ def main() -> None:
|
||||
versions["import_script"] = versions["import_script"].replace("release-", "")
|
||||
if versions["import_script"]:
|
||||
logger.debug(
|
||||
f"VERSION IMPORT SCRIPT: {component_key}: Found version '{versions['import_script']}' in import script '{import_script_path}'"
|
||||
"VERSION IMPORT SCRIPT: %s: Found version '%s' in import script '%s'",
|
||||
component_key,
|
||||
versions["import_script"],
|
||||
import_script_path,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
f"VERSION IMPORT SCRIPT: {component_key}: Import script not found! '{import_script_path}'"
|
||||
"VERSION IMPORT SCRIPT: %s: Import script not found! '%s'",
|
||||
component_key,
|
||||
import_script_path,
|
||||
)
|
||||
|
||||
############## Metadata ###############
|
||||
@@ -833,7 +760,7 @@ def main() -> None:
|
||||
if "{{VERSION}}" not in component["version"]:
|
||||
versions["metadata"] = component.get("version")
|
||||
|
||||
logger.info(f"VERSIONS: {component_key}: " + str(versions))
|
||||
logger.info("VERSIONS: %s: %s", component_key, str(versions))
|
||||
|
||||
############## Component Special Cases ###############
|
||||
process_component_special_cases(
|
||||
@@ -864,7 +791,11 @@ def main() -> None:
|
||||
)
|
||||
)
|
||||
logger.warning(
|
||||
f"VERSION MISMATCH: {component_key}: Endor version {versions['endor']} does not match import script version {versions['import_script']}. 'priority_version_source' from metadata: {versions['priority_version_source']}"
|
||||
"VERSION MISMATCH: %s: Endor version %s does not match import script version %s. 'priority_version_source' from metadata: %s",
|
||||
component_key,
|
||||
versions["endor"],
|
||||
versions["import_script"],
|
||||
versions["priority_version_source"],
|
||||
)
|
||||
|
||||
# For the standard workflow, we favor the pre-set priority version source,
|
||||
@@ -872,7 +803,9 @@ def main() -> None:
|
||||
if versions["priority_version_source"] and versions["priority_version_source"] in versions:
|
||||
version = versions[versions["priority_version_source"]]
|
||||
logger.info(
|
||||
f"VERSION: {component_key}: Using priority_version_source '{priority_version_source}' from metadata file."
|
||||
"VERSION: %s: Using priority_version_source '%s' from metadata file.",
|
||||
component_key,
|
||||
priority_version_source,
|
||||
)
|
||||
else:
|
||||
version = versions["endor"] or versions["import_script"] or versions["metadata"]
|
||||
@@ -891,36 +824,91 @@ def main() -> None:
|
||||
|
||||
set_dependency_version(meta_bom["dependencies"], meta_bom_ref, version)
|
||||
|
||||
# check against third_party folders
|
||||
# check against third_party folders and log codeowners if location is defined in evidence occurrences
|
||||
component_defines_location = False
|
||||
for occurrence in component.get("evidence", {}).get("occurrences", []):
|
||||
location = occurrence.get("location")
|
||||
if location:
|
||||
component_defines_location = True
|
||||
# Look up the codeowner for the folder and add as a property
|
||||
component_codeowners = owners.get_codeowners(location)
|
||||
logger.debug(
|
||||
"CODEOWNER: %s code owners for location %s are %s",
|
||||
component_key,
|
||||
location,
|
||||
component_codeowners,
|
||||
)
|
||||
if not component_codeowners:
|
||||
component_codeowners = ["unknown"]
|
||||
logger.warning(
|
||||
"CODEOWNER: %s could not determine code owners for location %s",
|
||||
component_key,
|
||||
location,
|
||||
)
|
||||
else:
|
||||
for codeowner in component_codeowners:
|
||||
try:
|
||||
jira_teams = owners.get_jira_team_from_codeowner(codeowner)
|
||||
except KeyError:
|
||||
logger.warning(
|
||||
"CODEOWNER: %s could not determine JIRA teams for codeowner %s. Mapping may be missing from buildscripts/util/co_jira_map.yml",
|
||||
component_key,
|
||||
codeowner,
|
||||
)
|
||||
jira_teams = [codeowner]
|
||||
continue
|
||||
for jira_team in jira_teams:
|
||||
add_component_property(
|
||||
component, "internal:team_responsible", jira_team
|
||||
)
|
||||
logger.info(
|
||||
"CODEOWNER: %s code owner team determined to be %s based on location %s",
|
||||
component_key,
|
||||
jira_team,
|
||||
location,
|
||||
)
|
||||
if location.startswith("src/third_party/"):
|
||||
location = location.replace("src/third_party/", "")
|
||||
if location in third_party_folders:
|
||||
third_party_folders[location] += 1
|
||||
logger.debug(
|
||||
f"THIRD_PARTY FOLDER: {component_key} matched folder {location} specified in SBOM"
|
||||
"THIRD_PARTY FOLDER: %s matched folder %s specified in SBOM",
|
||||
component_key,
|
||||
location,
|
||||
)
|
||||
elif os.path.isdir(git_info.repo_root.as_posix() + "/" + location):
|
||||
logger.debug(
|
||||
"THIRD_PARTY FOLDER: %s folder %s specified in SBOM exists",
|
||||
component_key,
|
||||
location,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"THIRD_PARTY FOLDER: {component_key} lists third-party location folder as {location}, which does not exist!"
|
||||
"THIRD_PARTY FOLDER: %s lists third-party location folder as %s, which does not exist!",
|
||||
component_key,
|
||||
location,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"THIRD_PARTY FOLDER: {component_key} lists a location as '{location}'. Ideally, all third-party components are located under 'src/third_party/'."
|
||||
"THIRD_PARTY FOLDER: %s lists a location as '%s'. Ideally, all third-party components are located under 'src/third_party/'.",
|
||||
component_key,
|
||||
location,
|
||||
)
|
||||
if not component_defines_location:
|
||||
logger.warning(
|
||||
f"THIRD_PARTY FOLDER: {component_key} does not define a location in '.evidence.occurrences[]'"
|
||||
"THIRD_PARTY FOLDER: %s does not define a location in '.evidence.occurrences[]'",
|
||||
component_key,
|
||||
)
|
||||
|
||||
# Deduplicate properties list
|
||||
component["properties"] = deduplicate_list_of_dicts(component.get("properties", []))
|
||||
|
||||
else:
|
||||
logger.warning(
|
||||
f"VERSION NOT FOUND: Could not find a version for {component_key}! Removing from SBOM. Component may need to be removed from the {sbom_metadata_path} file."
|
||||
"VERSION NOT FOUND: Could not find version information for '%s'! Removing from SBOM. Component may need to be removed from the %s file.",
|
||||
component_key,
|
||||
sbom_metadata_path,
|
||||
)
|
||||
del component
|
||||
remove_sbom_component(meta_bom, component_key)
|
||||
|
||||
print_banner("Third Party Folders")
|
||||
third_party_folders_missed = {
|
||||
@@ -928,8 +916,8 @@ def main() -> None:
|
||||
}
|
||||
if third_party_folders_missed:
|
||||
logger.warning(
|
||||
"THIRD_PARTY FOLDERS: 'src/third_party' folders not matched with a component: "
|
||||
+ ",".join(third_party_folders_missed.keys())
|
||||
"THIRD_PARTY FOLDERS: 'src/third_party' folders not matched with a component: %s",
|
||||
",".join(third_party_folders_missed.keys()),
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
@@ -947,16 +935,25 @@ def main() -> None:
|
||||
print_banner("New Endor Labs components")
|
||||
if endor_components:
|
||||
logger.info(
|
||||
f"ENDOR SBOM: There are {len(endor_components)} unmatched components in the Endor Labs SBOM. Adding as-is. The applicable metadata should be added to the metadata SBOM for the next run."
|
||||
"ENDOR SBOM: There are %d unmatched components in the Endor Labs SBOM. Adding as-is. The applicable metadata should be added to the metadata SBOM for the next run.",
|
||||
len(endor_components),
|
||||
)
|
||||
for component in endor_components:
|
||||
# set scope to excluded by default until the component is evaluated
|
||||
endor_components[component]["scope"] = "excluded"
|
||||
|
||||
# Add blank object for missing fields to avoid issues for downstream processing expecting those fields to exist
|
||||
if "licenses" not in endor_components[component]:
|
||||
endor_components[component]["licenses"] = []
|
||||
logger.warning(
|
||||
"LICENSES: %s does not have a 'licenses' field. Adding empty list to component.",
|
||||
endor_components[component]["bom-ref"],
|
||||
)
|
||||
meta_bom["components"].append(endor_components[component])
|
||||
meta_bom["dependencies"].append(
|
||||
{"ref": endor_components[component]["bom-ref"], "dependsOn": []}
|
||||
)
|
||||
logger.info(f"SBOM AS-IS COMPONENT: Added {component}")
|
||||
logger.warning("SBOM AS-IS COMPONENT: Added %s", component)
|
||||
|
||||
# endregion Parse unmatched Endor Labs components
|
||||
|
||||
@@ -966,14 +963,17 @@ def main() -> None:
|
||||
sbom_app_version_changed = (
|
||||
prev_bom["metadata"]["component"]["version"] != meta_bom["metadata"]["component"]["version"]
|
||||
)
|
||||
logger.info(f"SUMMARY: MongoDB version changed: {sbom_app_version_changed}")
|
||||
logger.info("SUMMARY: MongoDB version changed: %s", sbom_app_version_changed)
|
||||
|
||||
# Have the components changed?
|
||||
prev_components = sbom_components_to_dict(prev_bom, with_version=True)
|
||||
meta_components = sbom_components_to_dict(meta_bom, with_version=True)
|
||||
sbom_components_changed = prev_components.keys() != meta_components.keys()
|
||||
logger.info(
|
||||
f"SBOM_DIFF: SBOM components changed (added, removed, or version): {sbom_components_changed}. Previous SBOM has {len(prev_components)} components; New SBOM has {len(meta_components)} components"
|
||||
"SBOM_DIFF: SBOM components changed (added, removed, or version): %s. Previous SBOM has %d components; New SBOM has %d components",
|
||||
sbom_components_changed,
|
||||
len(prev_components),
|
||||
len(meta_components),
|
||||
)
|
||||
|
||||
# Components in prev SBOM but not in generated SBOM
|
||||
@@ -982,16 +982,16 @@ def main() -> None:
|
||||
prev_components_diff = list(set(prev_components.keys()) - set(meta_components.keys()))
|
||||
if prev_components_diff:
|
||||
logger.info(
|
||||
"SBOM_DIFF: Components in previous SBOM and not in generated SBOM: "
|
||||
+ ",".join(prev_components_diff)
|
||||
"SBOM_DIFF: Components in previous SBOM and not in generated SBOM: %s",
|
||||
",".join(prev_components_diff),
|
||||
)
|
||||
|
||||
# Components in generated SBOM but not in prev SBOM
|
||||
meta_components_diff = list(set(meta_components.keys()) - set(prev_components.keys()))
|
||||
if meta_components_diff:
|
||||
logger.info(
|
||||
"SBOM_DIFF: Components in generated SBOM and not in previous SBOM: "
|
||||
+ ",".join(meta_components_diff)
|
||||
"SBOM_DIFF: Components in generated SBOM and not in previous SBOM: %s",
|
||||
",".join(meta_components_diff),
|
||||
)
|
||||
|
||||
# serialNumber https://cyclonedx.org/docs/1.5/json/#serialNumber
|
||||
@@ -1020,13 +1020,17 @@ def main() -> None:
|
||||
# metadata.tools https://cyclonedx.org/docs/1.5/json/#metadata_tools
|
||||
meta_bom["metadata"]["tools"] = endor_bom["metadata"]["tools"]
|
||||
|
||||
write_sbom_json_file(meta_bom, sbom_out_path)
|
||||
write_sbom_json_file(meta_bom, sbom_out_internal_path)
|
||||
|
||||
convert_sbom_to_public(meta_bom)
|
||||
write_sbom_json_file(meta_bom, sbom_out_public_path)
|
||||
|
||||
# Access the collected warnings
|
||||
print_banner("CONSOLIDATED WARNINGS")
|
||||
warnings = []
|
||||
for record in warning_handler.warnings:
|
||||
warnings.append("- " + record.getMessage())
|
||||
warnings.sort()
|
||||
|
||||
print("\n".join(warnings))
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -6,11 +6,19 @@ Script that opens a PR using a bot to update SBOM-related files.
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
from github import Commit, GithubException, GithubIntegration, GitRef, PullRequest, Repository
|
||||
from github import (
|
||||
GithubException,
|
||||
GithubIntegration,
|
||||
GitRef,
|
||||
InputGitTreeElement,
|
||||
PullRequest,
|
||||
Repository,
|
||||
)
|
||||
|
||||
SBOM_FILES = ["sbom.json", "README.third_party.md"]
|
||||
SBOM_FILES = ["sbom.json", "sbom.private.json", "README.third_party.md"]
|
||||
|
||||
|
||||
def get_repository(github_owner, github_repo, app_id, _private_key) -> Repository.Repository:
|
||||
@@ -39,16 +47,16 @@ def get_pull_request(branch_gitref: GitRef.GitRef) -> PullRequest.PullRequest |
|
||||
return None
|
||||
|
||||
|
||||
def create_branch(base_branch, new_branch) -> None:
|
||||
def create_branch(repository, base_branch, new_branch) -> None:
|
||||
"""
|
||||
Create a new branch or get existing branch.
|
||||
"""
|
||||
try:
|
||||
print(f"Attempting to create branch '{new_branch}' with base branch '{base_branch}'.")
|
||||
ref = f"refs/heads/{new_branch}"
|
||||
base_repo_branch = repo.get_branch(base_branch)
|
||||
base_repo_branch = repository.get_branch(base_branch)
|
||||
sha = base_repo_branch.commit.sha
|
||||
repo.create_git_ref(ref=ref, sha=sha)
|
||||
repository.create_git_ref(ref=ref, sha=sha)
|
||||
print(f"Created branch '{new_branch}', ref: {ref}, sha: {sha}")
|
||||
except GithubException as e:
|
||||
if e.status == 422:
|
||||
@@ -57,25 +65,36 @@ def create_branch(base_branch, new_branch) -> None:
|
||||
raise
|
||||
|
||||
|
||||
def read_text_file(file_path: str) -> str:
|
||||
def read_text_file(path: str) -> str:
|
||||
"""Read a text file and return as string"""
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as file:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
content = file.read()
|
||||
return content
|
||||
except FileNotFoundError:
|
||||
print(f"ERROR: The file '{file_path}' was not found.")
|
||||
return f"ERROR: The file '{file_path}' was not found."
|
||||
except Exception as e:
|
||||
print(f"ERROR: The file '{path}' was not found.")
|
||||
return f"ERROR: The file '{path}' was not found."
|
||||
except (OSError, UnicodeDecodeError) as e:
|
||||
print(f"An error occurred: {e}")
|
||||
return f"ERROR: An error occurred while reading '{path}': {e}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="This script checks for changes to SBOM and related files and creats a PR if files have been updated.",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description=(
|
||||
"This script checks for changes to SBOM and related files and creates a PR if "
|
||||
"files have been updated."
|
||||
),
|
||||
)
|
||||
parser.add_argument("--github-owner", help="GitHub org/owner (e.g., 10gen).", type=str)
|
||||
parser.add_argument("--github-repo", help="GitHub repository name (e.g., mongo).", type=str)
|
||||
parser.add_argument(
|
||||
"--branch-filter",
|
||||
help="Create a PR only if base branch matches regex.",
|
||||
type=str,
|
||||
default=".*",
|
||||
)
|
||||
parser.add_argument("--base-branch", help="base branch to merge into.", type=str)
|
||||
parser.add_argument("--new-branch", help="New branch for the PR.", type=str)
|
||||
parser.add_argument("--pr-title", help="Title for the PR.", type=str)
|
||||
@@ -98,9 +117,17 @@ if __name__ == "__main__":
|
||||
|
||||
if not args.app_id or not args.private_key:
|
||||
parser.error(
|
||||
"Must define --app-id or env MONGO_PR_BOT_APP_ID and --private-key or env MONGO_PR_BOT_PRIVATE_KEY."
|
||||
"Must define --app-id or env MONGO_PR_BOT_APP_ID and --private-key or env "
|
||||
"MONGO_PR_BOT_PRIVATE_KEY."
|
||||
)
|
||||
|
||||
# Check if base branch matches the branch filter regex
|
||||
if not re.fullmatch(args.branch_filter, args.base_branch):
|
||||
print(
|
||||
f"Base branch '{args.base_branch}' does not match branch filter '{args.branch_filter}'. Terminating as successful."
|
||||
)
|
||||
sys.exit(0)
|
||||
|
||||
# Replace spaces with newline, if applicable
|
||||
private_key = (
|
||||
args.private_key[:31] + args.private_key[31:-29].replace(" ", "\n") + args.private_key[-29:]
|
||||
@@ -109,17 +136,29 @@ if __name__ == "__main__":
|
||||
repo = get_repository(args.github_owner, args.github_repo, args.app_id, private_key)
|
||||
print("repo: ", repo)
|
||||
|
||||
HAS_UPDATE = False
|
||||
# Collect all changed files first so we can commit them in a single commit
|
||||
changed_files: list[tuple[str, str]] = []
|
||||
|
||||
for file_path in SBOM_FILES:
|
||||
original_file = repo.get_contents(file_path, ref=f"refs/heads/{args.base_branch}")
|
||||
print("original_file: ", original_file)
|
||||
original_content = original_file.decoded_content.decode()
|
||||
print(f"Checking file '{file_path}' on '{args.base_branch}' for changes...")
|
||||
# Try to get the existing file from the base branch; 404 means "new file"
|
||||
try:
|
||||
original_file = repo.get_contents(file_path, ref=f"refs/heads/{args.base_branch}")
|
||||
print("original_file: ", original_file)
|
||||
original_content = original_file.decoded_content.decode()
|
||||
except GithubException as e:
|
||||
if e.status in [403, 404]:
|
||||
print(f"'{file_path}' does not exist on {args.base_branch}; treating as new file")
|
||||
original_content = ""
|
||||
else:
|
||||
raise
|
||||
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as file:
|
||||
new_content = file.read()
|
||||
except FileNotFoundError:
|
||||
print("Error: file '%s' not found.", file_path)
|
||||
continue
|
||||
|
||||
# Compare content with removed Endor Labs version to avoid triggering a new SBOM on only that change
|
||||
PATTERN = r'{"name":"EndorLabsInc","version":".*"}'
|
||||
@@ -128,37 +167,47 @@ if __name__ == "__main__":
|
||||
new_content_compare = re.sub(PATTERN, REPL, "".join(new_content.split()))
|
||||
|
||||
if original_content_compare != new_content_compare:
|
||||
create_branch(args.base_branch, args.new_branch)
|
||||
original_file_new_branch = repo.get_contents(
|
||||
file_path, ref=f"refs/heads/{args.new_branch}"
|
||||
print(f"Detected change in '{file_path}'")
|
||||
changed_files.append((file_path, new_content))
|
||||
|
||||
if changed_files:
|
||||
# Ensure the branch exists (create if needed)
|
||||
create_branch(repo, args.base_branch, args.new_branch)
|
||||
|
||||
# Small delay to reduce chance of 409s immediately after branch creation
|
||||
time.sleep(5)
|
||||
|
||||
# Base commit/tree on the current head of the PR branch
|
||||
branch_ref = repo.get_branch(args.new_branch)
|
||||
base_commit_sha = branch_ref.commit.sha
|
||||
base_commit = repo.get_git_commit(base_commit_sha)
|
||||
base_tree = repo.get_git_tree(base_commit_sha)
|
||||
|
||||
# Build tree elements for all changed files in one go
|
||||
elements = [
|
||||
InputGitTreeElement(
|
||||
path=path,
|
||||
mode="100644",
|
||||
type="blob",
|
||||
content=content,
|
||||
)
|
||||
print("original_file_new_branch: ", original_file_new_branch)
|
||||
for path, content in changed_files
|
||||
]
|
||||
|
||||
print("New file is different from original file.")
|
||||
print("repo.update_file:")
|
||||
print(f" message: Updating '{file_path}'")
|
||||
print(" path: ", file_path)
|
||||
print(" sha: ", original_file_new_branch.sha)
|
||||
print(" content:")
|
||||
print(new_content[:128])
|
||||
print("...[truncated]...")
|
||||
print(new_content[-128:])
|
||||
print(" branch: ", args.new_branch)
|
||||
time.sleep(10) # Wait to reduce chance of 409 errors
|
||||
update_file_result = repo.update_file(
|
||||
message=f"Updating '{file_path}'",
|
||||
path=file_path,
|
||||
sha=original_file_new_branch.sha,
|
||||
content=new_content,
|
||||
branch=args.new_branch,
|
||||
)
|
||||
print("update_file_result: ", update_file_result)
|
||||
commit: Commit = update_file_result.get("commit")
|
||||
print("commit: ", commit)
|
||||
new_tree = repo.create_git_tree(elements, base_tree)
|
||||
|
||||
HAS_UPDATE = True
|
||||
commit_message = "Update SBOM-related files: " + ", ".join(
|
||||
path for path, _ in changed_files
|
||||
)
|
||||
print("Creating single commit with message:", commit_message)
|
||||
|
||||
if HAS_UPDATE:
|
||||
new_commit = repo.create_git_commit(commit_message, new_tree, [base_commit])
|
||||
|
||||
# Move branch ref to new commit (single commit containing all file updates)
|
||||
ref = repo.get_git_ref(f"heads/{args.new_branch}")
|
||||
ref.edit(new_commit.sha)
|
||||
|
||||
if changed_files:
|
||||
# Get open PR or create new PR
|
||||
pull_requests = repo.get_pulls(
|
||||
state="open", head=f"{args.github_owner}:{args.new_branch}", base=args.base_branch
|
||||
@@ -173,7 +222,6 @@ if __name__ == "__main__":
|
||||
print(f" head={args.new_branch}")
|
||||
print(f" base={args.base_branch}")
|
||||
print(f" body={pr_body}")
|
||||
|
||||
pull_request = repo.create_pull(
|
||||
title=args.pr_title,
|
||||
head=args.new_branch,
|
||||
|
||||
261
buildscripts/sbom/sbom_utils.py
Normal file
261
buildscripts/sbom/sbom_utils.py
Normal file
@@ -0,0 +1,261 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Utility functions for processing CycloneDX SBOMs
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
logger = logging.getLogger("generate_sbom")
|
||||
logger.setLevel(logging.NOTSET)
|
||||
|
||||
# ################ PURL Validation ################
|
||||
REGEX_STR_PURL_OPTIONAL = ( # Optional Version (any chars except ? @ #)
|
||||
r"(?:@[^?@#]*)?"
|
||||
# Optional Qualifiers (any chars except @ #)
|
||||
r"(?:\?[^@#]*)?"
|
||||
# Optional Subpath (any chars)
|
||||
r"(?:#.*)?$"
|
||||
)
|
||||
|
||||
REGEX_PURL = {
|
||||
# deb PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/deb-definition.md
|
||||
"deb": re.compile(
|
||||
r"^pkg:deb/" # Scheme and type
|
||||
# Namespace (organization/user), letters must be lowercase
|
||||
r"(debian|ubuntu)+"
|
||||
r"/"
|
||||
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name
|
||||
),
|
||||
# Generic PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/generic-definition.md
|
||||
"generic": re.compile(
|
||||
r"^pkg:generic/" # Scheme and type
|
||||
r"([a-zA-Z0-9._-]+/)?" # Optional namespace segment
|
||||
r"[a-zA-Z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (required)
|
||||
),
|
||||
# GitHub PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/github-definition.md
|
||||
"github": re.compile(
|
||||
r"^pkg:github/" # Scheme and type
|
||||
# Namespace (organization/user), letters must be lowercase
|
||||
r"[a-z0-9-]+"
|
||||
r"/"
|
||||
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (repository)
|
||||
),
|
||||
# PyPI PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/pypi-definition.md
|
||||
"pypi": re.compile(
|
||||
r"^pkg:pypi/" # Scheme and type
|
||||
r"[a-z0-9_-]+" # Name, letters must be lowercase, dashes, underscore
|
||||
+ REGEX_STR_PURL_OPTIONAL
|
||||
),
|
||||
}
|
||||
|
||||
# Metadata SBOM requirements
|
||||
METADATA_FIELDS_REQUIRED = [
|
||||
"type",
|
||||
"bom-ref",
|
||||
"group",
|
||||
"name",
|
||||
"version",
|
||||
"description",
|
||||
"licenses",
|
||||
"copyright",
|
||||
"externalReferences",
|
||||
"scope",
|
||||
]
|
||||
METADATA_FIELDS_ONE_OF = [
|
||||
["author", "supplier"],
|
||||
["purl", "cpe"],
|
||||
]
|
||||
|
||||
|
||||
def add_component_property(component: dict, name: str, value: str) -> None:
|
||||
"""Add a key/value to to 'properties' in SBOM component"""
|
||||
if "properties" not in component:
|
||||
component["properties"] = []
|
||||
component["properties"].append({"name": name, "value": value})
|
||||
|
||||
|
||||
def check_metadata_sbom(meta_bom: dict) -> None:
|
||||
"""Run checks on SBOM component metadata for expected fields."""
|
||||
for component in meta_bom["components"]:
|
||||
for field in METADATA_FIELDS_REQUIRED:
|
||||
if field not in component:
|
||||
logger.warning(
|
||||
"METADATA: %s is missing required field '%s'.",
|
||||
(component.get("bom-ref") or component.get("name")),
|
||||
field,
|
||||
)
|
||||
for fields in METADATA_FIELDS_ONE_OF:
|
||||
found = False
|
||||
for field in fields:
|
||||
found = found or field in component
|
||||
if not found:
|
||||
logger.warning(
|
||||
"METADATA: %s is missing one of fields '%s'.",
|
||||
(component.get("bom-ref") or component.get("name")),
|
||||
fields,
|
||||
)
|
||||
|
||||
|
||||
def convert_sbom_to_public(sbom_dict: dict):
|
||||
"""Remove internal-only properties and components from SBOM"""
|
||||
|
||||
original_components_len = len(sbom_dict["components"])
|
||||
# Identify internal components based on evidence occurrence in internal folders
|
||||
internal_components = [
|
||||
c["bom-ref"]
|
||||
for c in sbom_dict["components"]
|
||||
if any(
|
||||
occurence.get("location", "").startswith("src/third_party/private")
|
||||
for occurence in c.get("evidence", {}).get("occurrences", [])
|
||||
)
|
||||
]
|
||||
|
||||
# Remove internal components and any dependencies on them from the SBOM
|
||||
sbom_dict["components"] = [
|
||||
c for c in sbom_dict["components"] if c["bom-ref"] not in internal_components
|
||||
]
|
||||
sbom_dict["dependencies"] = [
|
||||
d for d in sbom_dict["dependencies"] if d["ref"] not in internal_components
|
||||
]
|
||||
for dependency in sbom_dict["dependencies"]:
|
||||
dependency["dependsOn"] = [
|
||||
d for d in dependency["dependsOn"] if d not in internal_components
|
||||
]
|
||||
logger.info(
|
||||
"PUBLIC SBOM: Removed %d internal components",
|
||||
original_components_len - len(sbom_dict["components"]),
|
||||
)
|
||||
# Remove internal proerties from public components
|
||||
original_properties_len = sum(len(c.get("properties", [])) for c in sbom_dict["components"])
|
||||
for component in sbom_dict["components"]:
|
||||
component["properties"] = [
|
||||
p
|
||||
for p in component.get("properties", [])
|
||||
if not p.get("name", "").startswith("internal:")
|
||||
]
|
||||
logger.info(
|
||||
"PUBLIC SBOM: Removed %d internal properties from public components",
|
||||
original_properties_len
|
||||
- sum(len(c.get("properties", [])) for c in sbom_dict["components"]),
|
||||
)
|
||||
|
||||
|
||||
def is_valid_purl(purl: str) -> bool:
|
||||
"""Validate a GitHub or Generic PURL"""
|
||||
for purl_type, regex in REGEX_PURL.items():
|
||||
if regex.match(purl):
|
||||
logger.debug(
|
||||
"PURL: %s matched PURL type '%s' regex '%s'", purl, purl_type, regex.pattern
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def read_sbom_json_file(file_path: str) -> dict:
|
||||
"""Load a JSON SBOM file (schema is not validated)"""
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as input_json:
|
||||
sbom_json = input_json.read()
|
||||
result = json.loads(sbom_json)
|
||||
logger.info("SBOM loaded from %s with %d components", file_path, len(result["components"]))
|
||||
return result
|
||||
except OSError as e:
|
||||
logger.error("Error loading SBOM file from %s", file_path)
|
||||
logger.error(e)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error("Error decoding JSON SBOM file from %s", file_path)
|
||||
logger.error(e)
|
||||
|
||||
|
||||
def remove_sbom_component(sbom_dict: dict, component_key: str) -> None:
|
||||
"""Remove a component from the SBOM by its bom-ref key"""
|
||||
sbom_dict["components"] = [
|
||||
c for c in sbom_dict["components"] if not c["bom-ref"].startswith(component_key)
|
||||
]
|
||||
sbom_dict["dependencies"] = [
|
||||
d for d in sbom_dict["dependencies"] if not d["ref"].startswith(component_key)
|
||||
]
|
||||
for dependency in sbom_dict["dependencies"]:
|
||||
dependency["dependsOn"] = [
|
||||
d for d in dependency["dependsOn"] if not d.startswith(component_key)
|
||||
]
|
||||
logger.debug("Removed component '%s' from SBOM", component_key)
|
||||
|
||||
|
||||
def set_component_version(
|
||||
component: dict, version: str, purl_version: str = None, cpe_version: str = None
|
||||
) -> None:
|
||||
"""Update the appropriate version fields in a component from the metadata SBOM"""
|
||||
if not purl_version:
|
||||
purl_version = version
|
||||
|
||||
if not cpe_version:
|
||||
cpe_version = version
|
||||
|
||||
component["bom-ref"] = component["bom-ref"].replace("{{VERSION}}", purl_version)
|
||||
component["version"] = component["version"].replace("{{VERSION}}", version)
|
||||
if component.get("purl"):
|
||||
component["purl"] = component["purl"].replace(
|
||||
"{{VERSION}}", urllib.parse.quote(purl_version)
|
||||
)
|
||||
if not is_valid_purl(component["purl"]):
|
||||
logger.warning("PURL: Invalid PURL (%s)", component["purl"])
|
||||
if component.get("cpe"):
|
||||
component["cpe"] = component["cpe"].replace("{{VERSION}}", cpe_version)
|
||||
|
||||
|
||||
def set_dependency_version(dependencies: list, meta_bom_ref: str, purl_version: str) -> None:
|
||||
"""Update the appropriate dependency version fields from the metadata SBOM"""
|
||||
r = 0
|
||||
d = 0
|
||||
for dependency in dependencies:
|
||||
if "{{VERSION}}" in dependency["ref"] and dependency["ref"] == meta_bom_ref:
|
||||
dependency["ref"] = dependency["ref"].replace("{{VERSION}}", purl_version)
|
||||
r += 1
|
||||
for i in range(len(dependency["dependsOn"])):
|
||||
if dependency["dependsOn"][i] == meta_bom_ref:
|
||||
dependency["dependsOn"][i] = dependency["dependsOn"][i].replace(
|
||||
"{{VERSION}}", purl_version
|
||||
)
|
||||
d += 1
|
||||
|
||||
logger.debug(
|
||||
"set_dependency_version: '%s' updated %d refs and %d dependsOn", meta_bom_ref, r, d
|
||||
)
|
||||
|
||||
|
||||
def sbom_components_to_dict(sbom: dict, with_version: bool = False) -> dict:
|
||||
"""Create a dict of SBOM components with a version-less PURL as the key"""
|
||||
components = sbom["components"]
|
||||
if with_version:
|
||||
components_dict = {
|
||||
urllib.parse.unquote(component["bom-ref"]): component for component in components
|
||||
}
|
||||
else:
|
||||
components_dict = {
|
||||
urllib.parse.unquote(component["bom-ref"]).split("@")[0]: component
|
||||
for component in components
|
||||
}
|
||||
return components_dict
|
||||
|
||||
|
||||
def write_sbom_json_file(sbom_dict: dict, file_path: str) -> None:
|
||||
"""Save a JSON SBOM file (schema is not validated)"""
|
||||
try:
|
||||
file_path = os.path.abspath(file_path)
|
||||
with open(file_path, "w", encoding="utf-8") as output_json:
|
||||
formatted_sbom = json.dumps(sbom_dict, indent=2) + "\n"
|
||||
output_json.write(formatted_sbom)
|
||||
except OSError as e:
|
||||
logger.error("Error writing SBOM file to %s", file_path)
|
||||
logger.error(e)
|
||||
except TypeError as e:
|
||||
logger.error("Error serializing SBOM to JSON for file %s", file_path)
|
||||
logger.error(e)
|
||||
else:
|
||||
logger.info("SBOM file saved to %s", file_path)
|
||||
@@ -290,11 +290,13 @@ def main() -> int:
|
||||
help="Whether to apply formatting to the output file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input-file", default="sbom.json", help="The input CycloneDX file to format and lint."
|
||||
"--input-file",
|
||||
default="sbom.private.json",
|
||||
help="The input CycloneDX file to format and lint.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-file",
|
||||
default="sbom.json",
|
||||
default="sbom.private.json",
|
||||
help="The file to output to when formatting is specified.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -9,16 +9,16 @@ import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
sys.path.append("buildscripts/sbom")
|
||||
|
||||
from buildscripts.sbom.config import get_semver_from_release_version, regex_semver
|
||||
from buildscripts.sbom.endorctl_utils import EndorCtl
|
||||
from buildscripts.sbom.generate_sbom import is_valid_purl
|
||||
from buildscripts.sbom.sbom_utils import is_valid_purl
|
||||
|
||||
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
|
||||
|
||||
|
||||
class TestEndorctl(unittest.TestCase):
|
||||
"""Test cases for the EndorCtl class."""
|
||||
|
||||
def test_endorctl_init(self):
|
||||
"""Tests the Endorctl constructor."""
|
||||
e = EndorCtl(namespace="mongodb.10gen", retry_limit=1, sleep_duration=5)
|
||||
@@ -38,6 +38,12 @@ class TestEndorctl(unittest.TestCase):
|
||||
|
||||
|
||||
class TestConfigRegex(unittest.TestCase):
|
||||
"""Test suite for configuration regex patterns and PURL validation.
|
||||
|
||||
This test class validates regex patterns used for semantic versioning,
|
||||
version extraction from release strings, and Package URL (PURL) validation.
|
||||
"""
|
||||
|
||||
def test_semver_regex(self):
|
||||
"""Tests the regex_semver."""
|
||||
|
||||
@@ -180,10 +186,9 @@ class TestConfigRegex(unittest.TestCase):
|
||||
self.assertFalse(is_valid_purl(purl), f"Expected '{purl}' to be invalid")
|
||||
|
||||
|
||||
__unittest = True
|
||||
|
||||
|
||||
class TestMetadataFile(unittest.TestCase):
|
||||
"""Unit tests for SBOM metadata file validation and version tag consistency."""
|
||||
|
||||
TEST_DIR = os.path.join("buildscripts", "sbom")
|
||||
VERSION_TAG = "{{VERSION}}"
|
||||
|
||||
@@ -194,6 +199,13 @@ class TestMetadataFile(unittest.TestCase):
|
||||
return json.loads(sbom_json)
|
||||
|
||||
def test_metadata_sbom_version_tags(self):
|
||||
"""Test that SBOM metadata components have consistent version tags.
|
||||
|
||||
Verifies that each component in the metadata SBOM file contains required fields
|
||||
(bom-ref and version) plus at least one of purl or cpe. Additionally ensures that
|
||||
the VERSION_TAG is either present in all component properties or absent from all,
|
||||
maintaining consistency across bom-ref, version, purl, and cpe fields.
|
||||
"""
|
||||
sbom_metadata_file = os.path.join(self.TEST_DIR, "metadata.cdx.json")
|
||||
print(sbom_metadata_file)
|
||||
meta_bom = self.read_sbom_json_file(sbom_metadata_file)
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
- Services & Integrations
|
||||
10gen/performance:
|
||||
- Product Performance
|
||||
10gen/platsec-server:
|
||||
- Product Security
|
||||
10gen/query:
|
||||
- Query Execution
|
||||
- Query Integration
|
||||
@@ -32,6 +34,8 @@
|
||||
- Query Optimization
|
||||
10gen/server-catalog-and-routing:
|
||||
- Catalog And Routing
|
||||
10gen/server-catalog-and-routing-shard-catalog:
|
||||
- Catalog And Routing
|
||||
10gen/server-cluster-scalability:
|
||||
- Cluster Scalability
|
||||
10gen/server-networking-and-observability:
|
||||
@@ -60,5 +64,7 @@
|
||||
- Storage Execution
|
||||
10gen/server-workload-resilience:
|
||||
- Workload Execution
|
||||
10gen/storage-engines:
|
||||
- Storage Engines
|
||||
10gen/streams-engine:
|
||||
- Atlas Streams
|
||||
|
||||
@@ -19,7 +19,9 @@ def process_owners(cur_dir: str) -> tuple[dict[re.Pattern, list[str]], bool]:
|
||||
contents = yaml.safe_load(f)
|
||||
|
||||
assert "version" in contents, f"Version not found in {owners_file_path}"
|
||||
assert contents["version"] == "1.0.0", f"Invalid version in {owners_file_path}"
|
||||
assert (
|
||||
contents["version"] == "1.0.0" or contents["version"] == "2.0.0"
|
||||
), f"Invalid version in {owners_file_path}"
|
||||
assert "filters" in contents
|
||||
|
||||
no_parent_owners = False
|
||||
|
||||
@@ -2029,6 +2029,7 @@ tasks:
|
||||
- "buildscripts/sbom/generate_sbom.py"
|
||||
- "--project=https://github.com/10gen/mongo.git"
|
||||
- "--target=branch"
|
||||
- "--branch-filter=${BRANCH_FILTER}"
|
||||
- "--branch=${branch_name}"
|
||||
- "--endorctl-path=${workdir}/endorctl"
|
||||
- "--config-path=${workdir}/.endorctl"
|
||||
@@ -2053,6 +2054,7 @@ tasks:
|
||||
- "buildscripts/sbom/sbom_files_pr.py"
|
||||
- "--github-owner=${github_org}"
|
||||
- "--github-repo=${github_repo}"
|
||||
- "--branch-filter=${BRANCH_FILTER}"
|
||||
- "--base-branch=${branch_name}"
|
||||
- "--new-branch=SERVER-111072/sbom_update_${branch_name}"
|
||||
- "--pr-title=SERVER-111072 Auto-generated SBOM files [${branch_name}]"
|
||||
@@ -2103,6 +2105,7 @@ tasks:
|
||||
- "--run"
|
||||
env:
|
||||
REQUESTER: ${requester}
|
||||
BRANCH_FILTER: ${BRANCH_FILTER}
|
||||
BRANCH_NAME: ${branch_name}
|
||||
GITHUB_ORG: ${github_org}
|
||||
GITHUB_REPO: ${github_repo}
|
||||
@@ -2110,7 +2113,7 @@ tasks:
|
||||
CONTAINER_IMAGE: 901841024863.dkr.ecr.us-east-1.amazonaws.com/release-infrastructure/silkbomb:2.0
|
||||
CONTAINER_ENV_FILES: ${workdir}/silkbomb.env
|
||||
WORKING_DIR: ${workdir}
|
||||
SBOM_REPO_PATH: sbom.json
|
||||
SBOM_REPO_PATH: ${SBOM_REPO_PATH}
|
||||
LOCAL_REPO_PATH: src
|
||||
|
||||
- name: check_for_noexcept
|
||||
|
||||
@@ -108,10 +108,10 @@ tasks:
|
||||
CONTAINER_ENV_FILES: ${workdir}/silkbomb.env
|
||||
CONTAINER_VOLUMES: -v ${workdir}:/workdir
|
||||
CONTAINER_IMAGE: 901841024863.dkr.ecr.us-east-1.amazonaws.com/release-infrastructure/silkbomb:2.0
|
||||
SBOM_REPO_PATH: sbom.json
|
||||
SBOM_REPO_PATH: sbom.private.json
|
||||
SBOM_OUT_PATH: ${workdir}/sbom-with-vex-${branch_name}.json
|
||||
SILKBOMB_COMMAND: augment
|
||||
SILKBOMB_ARGS: --sbom-in /workdir/src/sbom.json --sbom-out /workdir/src/sbom-with-vex-${branch_name}.json --repo ${github_org}/${github_repo} --branch ${branch_name}
|
||||
SILKBOMB_ARGS: --sbom-in /workdir/src/sbom.private.json --sbom-out /workdir/src/sbom-with-vex-${branch_name}.json --repo ${github_org}/${github_repo} --branch ${branch_name}
|
||||
- command: subprocess.exec
|
||||
display_name: Upload SBOM to Google Drive"
|
||||
params:
|
||||
|
||||
@@ -119,6 +119,22 @@ buildvariants:
|
||||
run_on: rhel92-small
|
||||
expansions:
|
||||
ENDOR_NAMESPACE: mongodb.10gen
|
||||
BRANCH_FILTER: master|v[0-9]+\.[0-9]+-staging
|
||||
stepback: false
|
||||
tasks:
|
||||
- name: update_sbom
|
||||
|
||||
- name: upload-sbom-if-changed
|
||||
display_name: "Upload SBOM if changed"
|
||||
allowed_requesters: ["commit"]
|
||||
expansions:
|
||||
BRANCH_FILTER: master|v[0-9]+\.[0-9]+-staging
|
||||
SBOM_REPO_PATH: &sbom_file sbom.private.json
|
||||
paths:
|
||||
- *sbom_file
|
||||
tags: ["assigned_to_jira_team_platsec_ssdlc"]
|
||||
run_on:
|
||||
- rhel8.8-small
|
||||
stepback: false
|
||||
tasks:
|
||||
- name: upload_sbom_via_silkbomb_if_changed
|
||||
|
||||
@@ -376,16 +376,3 @@ buildvariants:
|
||||
- name: sharding_pqs_index_filters
|
||||
distros:
|
||||
- rhel8.8-medium
|
||||
|
||||
- name: upload-sbom-if-changed
|
||||
display_name: "Upload SBOM if changed"
|
||||
allowed_requesters: ["commit"]
|
||||
activate: true
|
||||
paths:
|
||||
- "sbom.json"
|
||||
tags: ["assigned_to_jira_team_platsec_ssdlc"]
|
||||
run_on:
|
||||
- rhel8.8-small
|
||||
stepback: false
|
||||
tasks:
|
||||
- name: upload_sbom_via_silkbomb_if_changed
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import pathlib
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
@@ -50,7 +51,7 @@ def upload_sbom_via_silkbomb(
|
||||
container_options = ["--pull=always", "--platform=linux/amd64", "--rm"]
|
||||
container_env_files = ["--env-file", str(creds_file_path.resolve())]
|
||||
container_volumes = ["-v", f"{workdir}:/workdir"]
|
||||
silkbomb_command = "augment" # it augment first and uses upload command
|
||||
silkbomb_command = "upload"
|
||||
silkbomb_args = [
|
||||
"--sbom-in",
|
||||
f"/workdir/{local_repo_path}/{sbom_repo_path}",
|
||||
@@ -105,7 +106,7 @@ def upload_sbom_via_silkbomb(
|
||||
try:
|
||||
print(f"Running command: {' '.join(command)}")
|
||||
subprocess.run(command, check=True, text=True, capture_output=True, timeout=timeout_seconds)
|
||||
print("Updated sbom.json file upload via Silkbomb successful!")
|
||||
print("Updated SBOM file upload via Silkbomb successful!")
|
||||
except FileNotFoundError as e:
|
||||
print(f"Error: '{container_command}' command not found.")
|
||||
raise e
|
||||
@@ -135,6 +136,14 @@ def run(
|
||||
str,
|
||||
typer.Option(..., envvar="LOCAL_REPO_PATH", help="Path to the local git repository."),
|
||||
],
|
||||
branch_filter: Annotated[
|
||||
str,
|
||||
typer.Option(
|
||||
...,
|
||||
envvar="BRANCH_FILTER",
|
||||
help=r"Upload SBOM only if branch_name matches regex. (e.g., 'master|v[0-9]+\.[0-9]+-staging').",
|
||||
),
|
||||
],
|
||||
branch_name: Annotated[
|
||||
str,
|
||||
typer.Option(..., envvar="BRANCH_NAME", help="The head branch (e.g., the PR branch name)."),
|
||||
@@ -147,7 +156,7 @@ def run(
|
||||
envvar="SBOM_REPO_PATH",
|
||||
help="Path to the SBOM file to check and upload.",
|
||||
),
|
||||
] = "sbom.json",
|
||||
] = "sbom.private.json",
|
||||
requester: Annotated[
|
||||
str,
|
||||
typer.Option(
|
||||
@@ -181,13 +190,11 @@ def run(
|
||||
bool, typer.Option("--check-sbom-file-change", help="Check for changes to the SBOM file.")
|
||||
] = False,
|
||||
):
|
||||
if requester != "commit" and not dry_run:
|
||||
print(f"Skipping: Run can only be triggered for 'commit', but requester was '{requester}'.")
|
||||
sys.exit(0)
|
||||
|
||||
major_branches = ["v7.0", "v8.0", "v8.1", "master"] # Only major branches that MongoDB supports
|
||||
if False and branch_name not in major_branches:
|
||||
print(f"Skipping: Branch '{branch_name}' is not a major branch. Exiting.")
|
||||
# Check if branch name matches the branch filter regex
|
||||
if not re.fullmatch(branch_filter, branch_name):
|
||||
print(
|
||||
f"Branch '{branch_name}' does not match branch filter '{branch_filter}'. Terminating as successful."
|
||||
)
|
||||
sys.exit(0)
|
||||
|
||||
repo_path = pathlib.Path(f"{workdir}/{local_repo_path}")
|
||||
|
||||
3
src/third_party/OWNERS.yml
vendored
3
src/third_party/OWNERS.yml
vendored
@@ -100,9 +100,6 @@ filters:
|
||||
- "pcre2":
|
||||
approvers:
|
||||
- 10gen/query-execution
|
||||
- "private/.placeholder":
|
||||
approvers:
|
||||
- 10gen/devprod-build
|
||||
- "prometheus-cpp":
|
||||
approvers:
|
||||
- 10gen/server-networking-and-observability
|
||||
|
||||
89
src/third_party/README.md
vendored
89
src/third_party/README.md
vendored
@@ -9,18 +9,18 @@ This policy applies to [github.com/mongodb/mongo](https://github.com/mongodb/mon
|
||||
1. Fork the third-party library into [github.com/mongodb-forks](https://github.com/mongodb-forks).
|
||||
> **Note:** To track versions for vulnerabilities, forking a named version (e.g., `v2.0.1`) is required against forking a specific commit.
|
||||
2. Pull the library from [github.com/mongodb-forks](https://github.com/mongodb-forks) into the `src/third_party` directory inside a folder named for the library being vendored.
|
||||
3. Include the added library in `/sbom.json` under `components`. This will be verified by the linter in `buildscripts/sbom_linter.py`. For more detail, see the [SBOM](#sbom) section below.
|
||||
3. It is not necessary to update the `/sbom.json` file, as an automated Evergreen task will add the component to the SBOM once merged.
|
||||
> **Optional, but preferred:** Add component metadata to the `buildscripts/sbom/metadata.cdx.json`, see the [SBOM](#sbom) section below for field definitions. If not added, the automated SBOM generation will instead gather all available information from the C/C++ SCA tooling.
|
||||
4. Include a `scripts/import.sh` script inside the vendored library.
|
||||
> **Note:** A specific reference to the forked branch in [github.com/mongodb-forks](https://github.com/mongodb-forks) must be hardcoded. This helps developers understand and replicate the process used to vendor a specific library, facilitating maintenance.
|
||||
5. Include a `VERSION=XYZ` line in the `scripts/import.sh` script (here `XYZ` indicates the version of the third party library).
|
||||
5. Include a `VERSION=XYZ` line in the `scripts/import.sh` script (here `XYZ` indicates the version of the third party library). This line will be used by the automated SBOM generation.
|
||||
|
||||
## Updating a third-party library in the server to a new upstream version
|
||||
|
||||
1. Fork the new upstream version to the repo already created in [github.com/mongodb-forks](https://github.com/mongodb-forks).
|
||||
2. Pull the forked version from [github.com/mongodb-forks](https://github.com/mongodb-forks) to the vendored library in `src/third_party`.
|
||||
3. Update `src/third_party/<vendored-library>/scripts/import.sh` with the exact reference used.
|
||||
4. Update `/sbom.json` with the new vendored version.
|
||||
> **Note:** Remember to update both the `version` and the `purl`.
|
||||
4. It is not necessary to update the `/sbom.json` file, as an automated Evergreen task will update the component version in the SBOM once merged.
|
||||
|
||||
## Modifying a third-party library in the server
|
||||
|
||||
@@ -34,21 +34,27 @@ The `sbom.json` file in the root of the MongoDB repository contains key informat
|
||||
|
||||
Exhaustive documentation can be found at [https://cyclonedx.org/schema/](https://cyclonedx.org/schema/), this README is intended to describe our most common uses of fields. If your library does not easily fit the standard values below, please reach out to the Server Security team for assistance.
|
||||
|
||||
Custom or enriched component metadata can optionally be added to `buildscripts/sbom/metadata.cdx.json`. The automated SBOM generation Evergreen task will check for component metadata in this file and merge it with results from the C/C++ SCA tooling.
|
||||
|
||||
## Components
|
||||
|
||||
The top-level key "components" contains an array of third party components vendored in our repository. `component` objects have the following fields:
|
||||
The top-level key "components" contains an array of third party components vendored in our repository. `component` objects have the following fields in `buildscripts/sbom/metadata.cdx.json`:
|
||||
|
||||
| Field Name | Description |
|
||||
| --- | --- |
|
||||
`name` | The name of the component.|
|
||||
| `version` | The version of the component. The `import.sh` file created for the component should have a line like `VERSION=1.2.3` where the right side of the `=` matches this string.|
|
||||
| `type` | The type of the component, such as library, application, framework, etc. For our vendored components, this will generally be `library`.|
|
||||
| `purl` | Package URL. A URL that uniquely identifies the component and its version. This is a standard format that looks like `pkg:[type]/[packagename]@[version]`.|
|
||||
| `supplier` | The source of the package, often correlated with the package type in the `purl`.
|
||||
| `bom-ref` | A UUID to identify the component, since all other fields are subject to change. Can be generated by running `uuidgen`.|
|
||||
| `licenses` | Information about the licenses under which the component is used. For boilerplate licenses, this is the [SPDX license identifier](https://spdx.org/licenses/) for the license. This field also supports urls and text blobs.|
|
||||
| `scope` | The intended usage scope of the component in MongoDB. If the binary is distributed with our software, this must be `required`. For components used only for testing, this should be `excluded`.|
|
||||
| `evidence` | This contains an array of `occurences`, which in turn contain `location` strings specifying the location of the component in our repo.|
|
||||
|`bom-ref` | Should be the same as the `purl` field, including the `{{VERSION}}` as a placeholder string.|
|
||||
|`supplier` and/or `author` | The entity supplying the package and/or the author(s) of the package. Must have at least one of these fields. |
|
||||
|`group` | The grouping name or identifier. Typically the GitHub organization, the source package, or domain name.|
|
||||
|`name` | The name of the component.|
|
||||
|`version` | The version of the component. Set to `{{VERSION}}` as a placeholder string. The `import.sh` file created for the component should have a line like `VERSION=1.2.3` where the right side of the `=` specifies teh version.|
|
||||
|`description` | A brief description of the package and its function.|
|
||||
|`scope` | Set to `required` if package is always included in the distribution, `optional` if sometimes included (e.g., Windows-only), or `excluded` if only used from build/test/dev.
|
||||
|`licenses` | Information about the licenses under which the component is used. For boilerplate licenses, this is the [SPDX license identifier](https://spdx.org/licenses/) for the license. This field also supports urls and text blobs.|
|
||||
|`copyright` | A copyright notice informing users of the underlying claims to copyright ownership in a published work.|
|
||||
|`cpe` and/or `purl` | The Common Platform Enumeration (CPE) [https://nvd.nist.gov/products/cpe](CPE Dictionary) and/or Package URL (PURL) [https://github.com/package-url/purl-spec](specification). It is required that one or both of these fields be populated for the purposes of SBOM vulnerability analysis. Use `{{VERSION}}` as a placeholder string.|
|
||||
| `externalReferences` | This contains an array informational links about the component, typically the location of the git repo (`url`) and the type (`distribution` or `vcs`). It is used to populate [README.third_party.md](/README.third_party.md) |
|
||||
| `evidence` | This contains an array of `occurrences`, which in turn contain `location` strings specifying the location of the component in our repo.|
|
||||
| `properties` | Additional custom properties related to the component, see below.|
|
||||
|
||||
## Properties
|
||||
@@ -57,7 +63,58 @@ Component objects contain a `properties` field that is used for adding our own p
|
||||
|
||||
| Field Name | Description |
|
||||
| --- | --- |
|
||||
| `internal:team_responsible` | The MongoDB team responsible for this library. The team name should match the string for the team in [mothra](https://github.com/10gen/mothra/blob/main/mothra/teams/database.yaml). |
|
||||
| `emits_persisted_data` | This should be set to true if the component outputs persisted data to disk. This is important because in this case, updating the library could cause breakage due to the format of this data changing. |
|
||||
| `info_link` | This is an informational link about the component. It is used to populate [README.third_party.md](/README.third_party.md) |
|
||||
| `import_script_path` | The location of the script used to update the library to a new version. The standard location is `src/third_party/[componentdir]/scripts/import.sh`. |
|
||||
| `import_script_path` | The location of the script (if it exists) used to update the library to a new version. The standard location is `src/third_party/[componentdir]/scripts/import.sh`. |
|
||||
|
||||
### Component Metadata Example
|
||||
```
|
||||
{
|
||||
"type": "library",
|
||||
"bom-ref": "pkg:github/boostorg/boost@boost-{{VERSION}}",
|
||||
"supplier": {
|
||||
"name": "The Boost Foundation",
|
||||
"url": [
|
||||
"https://www.boost.org/"
|
||||
]
|
||||
},
|
||||
"author": "Boost Developers",
|
||||
"group": "boost",
|
||||
"name": "Boost C++ Libraries",
|
||||
"version": "{{VERSION}}",
|
||||
"description": "Super-project for modularized Boost. Boost is a repository of free, portable, peer-reviewed C++ libraries",
|
||||
"scope": "required",
|
||||
"licenses": [
|
||||
{
|
||||
"license": {
|
||||
"id": "BSL-1.0"
|
||||
}
|
||||
}
|
||||
],
|
||||
"copyright": "Boost copyright claims are made on a per-file basis and listed as comments in source file headers",
|
||||
"cpe": "cpe:2.3:a:boost:boost:{{VERSION}}:*:*:*:*:*:*:*",
|
||||
"purl": "pkg:github/boostorg/boost@boost-{{VERSION}}",
|
||||
"externalReferences": [
|
||||
{
|
||||
"url": "https://github.com/boostorg/boost.git",
|
||||
"type": "distribution"
|
||||
}
|
||||
],
|
||||
"evidence": {
|
||||
"occurrences": [
|
||||
{
|
||||
"location": "src/third_party/boost"
|
||||
}
|
||||
]
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"name": "emits_persisted_data",
|
||||
"value": "false"
|
||||
},
|
||||
{
|
||||
"name": "import_script_path",
|
||||
"value": "src/third_party/boost/scripts/import.sh"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user