From 98fbb068ce6a484b07d329329fddb0fb6f7bdd90 Mon Sep 17 00:00:00 2001 From: Krzesimir Nowak Date: Tue, 19 Sep 2023 13:18:01 +0200 Subject: [PATCH 1/3] overlay profiles: Fix a couple of issues with SLSA provenance stuff - Update to slsa 1.0. This is only partially done, as we still need to provide a proper build type. Maybe we could reuse the Github Actions Workflow (https://slsa-framework.github.io/github-actions-buildtypes/workflow/v1). - Stop using portageq - its use in ebuilds is banned, so eventually it would stop working. Replace it with our hack. - Stop trying to get a commit hash of coreos-overlay or portage-stable as if they were submodules. This setup is long gone, so a commit hash of toplevel scripts repo is enough. - Use zstd for compressing generated JSON files. --- .../base/profile.bashrc.slsa-provenance | 303 +++++++++++------- 1 file changed, 189 insertions(+), 114 deletions(-) diff --git a/sdk_container/src/third_party/coreos-overlay/profiles/coreos/base/profile.bashrc.slsa-provenance b/sdk_container/src/third_party/coreos-overlay/profiles/coreos/base/profile.bashrc.slsa-provenance index 76a3c02f7f2..f87af99ae36 100644 --- a/sdk_container/src/third_party/coreos-overlay/profiles/coreos/base/profile.bashrc.slsa-provenance +++ b/sdk_container/src/third_party/coreos-overlay/profiles/coreos/base/profile.bashrc.slsa-provenance @@ -1,178 +1,250 @@ # Build provenance hooks # ====================== # The functions below hook into every ebuild's execution and generate provenance files -# to meet the SLSA provenance requirements (https://slsa.dev/spec/v0.1/requirements#available). +# to meet the SLSA provenance requirements (https://slsa.dev/spec/v1.0/requirements#provenance-generation). # All input files (source tarball / git commit hash) plus added patches / files, # and all resulting installation binaries and files are captured. -# The information is emitted in SLSA provenance 0.2 format (see https://slsa.dev/provenance/v0.2) +# The information is emitted in SLSA provenance v1 format (see https://slsa.dev/spec/v1.0/provenance) # We only record provenance when a package is actually being built. # See profiles/coreos/base/profile.bashrc for cros_... cros_post_src_configure_enable_slsa_provenance_report() { - if [ "${GENERATE_SLSA_PROVENANCE:-}" != "true" ] ; then - einfo "Provenance generation not requested by build; skipping." + if [[ ${GENERATE_SLSA_PROVENANCE:-} != 'true' ]] ; then + einfo 'Provenance generation not requested by build; skipping.' return 0 fi - export generate_slsa_provenance_report="yes" + export generate_slsa_provenance_report=x } # -- -# Generate SLSA provenance 0.2 Subject information. -# The information will cover all installation files shipped with a package. -__slsa_provenance_subject() { - local parallel="$(nproc)" - local comma="" +# Prints a minimal SLSA ResourceDescriptor, with uri and digest of a +# specific kind. Optionally prints a leading comma. +# +# 1 - URI +# 2 - Digest kind +# 3 - Checksum +# 4 - Prints a leading comma if not empty. Optional, defaults to non-empty value. +__slsa_rd_printf() { + local uri kind csum leading_comma + uri=${1}; shift + kind=${1}; shift + csum=${1}; shift + leading_comma=${1-x} + + printf '%s{ "uri": "%s", "digest": { "%s": "%s" } }\n' \ + "${leading_comma:+,}" "${uri}" "${kind}" "${csum}" +} +# -- - einfo " Provenance: recording subject (output)" +# Generate SLSA provenance 1.0 Subject information. +# The information will cover all installation files shipped with a package. +__slsa_provenance_subject_members() { + local parallel do_comma checksum filepath filepath_d - echo ' "subject": [' + einfo ' Provenance: recording subject (output)' - ( - cd "$D" - find . -type f -print | sed 's:^./::' | xargs -P "$parallel" -L 1 sha512sum | sort -k2 - ) | while read checksum filepath; do - echo -en "${comma} {\"name\":\"/$filepath\", \"digest\":{\"sha512\":\"$checksum\"}}" - if [ -z "$comma" ] ; then - comma=',\n' - fi - done - echo -en "\n ]" + parallel=$(nproc) + do_comma='' + find "${D}" -type f -print0 | \ + xargs -0 -P "${parallel}" -L 1 sha512sum | \ + sort -k2 | \ + while read -r checksum filepath; do + filepath_d=${filepath#"${D}/"} + __slsa_rd_printf "${filepath_d}" 'sha512' "${checksum}" "${do_comma}" + if [[ -z ${do_comma} ]] ; then + do_comma=x + fi + done } # -- -__slsa_provenance_materials() { - local csum="" uri="" repo="" ebuild="" ebuildcsum="" - - local ebuild="${CATEGORY}/${PN}/${PF}.ebuild" - local repopath="$(portageq get_repo_path ${ROOT:-/} coreos)" - if [ -f "${repopath}/${ebuild}" ] ; then - repo="coreos-overlay" - ebuildcsum=$(sha1sum - < "${repopath}/${ebuild}") - else - repopath="$(portageq get_repo_path ${ROOT:-/} portage-stable)" - if [ -f "${repopath}/${ebuild}" ] ; then - repo="portage-stable" - ebuildcsum=$(sha1sum - < "${repopath}/${ebuild}") +__slsa_current_repo() { + local ebuild=${1}; shift + local -n repo_ref=${1}; shift + local -n ebuild_full_path_ref=${1}; shift + + local some_root sr_set v + sr_set= + for v in SYSROOT ROOT BROOT; do + if [[ -n ${!v:-} ]]; then + some_root=${!v%/} + # strip all trailing slashes, could be easier with extglob, but + # this is not guaranteed by PMS. + while [[ ${some_root%/} != "${some_root}" ]]; do + some_root=${some_root%/} + done + sr_set=x + break fi + done + if [[ -z ${sr_set} ]]; then + die "SLSA provenance: No root directory for portage configuration could be found" fi - if [ -z "${repo}" ]; then - die "SLSA provenance: Unable to detect ebuild repository for package '${ebuild}'" + + local repos_conf + local -a locations + repos_conf="${some_root}/etc/portage/repos.conf" + if [[ ! -e "${repos_conf}" ]]; then + die "SLSA provenance: No repos.conf found in '${some_root}/etc/portage'" fi - ebuildcsum=${ebuildcsum%% *} + mapfile -t locations < <( + if [[ -f ${repos_conf} ]]; then + cat "${repos_conf}" + else + cat "${repos_conf}/"*'.conf' + fi | grep '^[[:space:]]*location[[:space:]]*=' | sed -e 's/^[^=]*=[[:space:]]*//' + ) + local loc ebuild_full + for loc in "${locations[@]}"; do + ebuild_full="${loc}/${ebuild}" + if [[ -f ${ebuild_full} ]]; then + ebuild_full_path_ref=${ebuild_full} + repo_ref=${loc##*/} + return 0 + fi + done + die "SLSA provenance: ebuild file not found in any repo (${locations[*]})" +} +# -- + +__slsa_provenance_resolved_dependencies() { + local scripts_hash + scripts_hash=${1}; shift + + local ebuild spm_repo spm_ebuild_full_path + ebuild="${CATEGORY}/${PN}/${PF}.ebuild" + __slsa_current_repo "${ebuild}" spm_repo spm_ebuild_full_path - einfo " Provenance: recording ebuild material (input) '${repo}/${ebuild}'" - echo ' "materials": [' + local csum + csum=$(sha1sum - < "${spm_ebuild_full_path}") + csum=${csum%% *} - # The ebuild. Since "configSource" in "invocation" cannot have more than one (top/level) entry - # we add the ebuild and git repo checksum here, as a material. - csum="$(cat "/mnt/host/source/src/scripts/.git/modules/sdk_container/src/third_party/${repo}/HEAD")" - uri="git+https://github.com/flatcar/${repo}.git@${csum}#${ebuild}" - echo -e " { \"uri\": \"${uri}\"," - echo -n " \"digest\": {\"sha1\":\"${ebuildcsum}\"} }" + einfo " Provenance: recording ebuild material (input) '${spm_repo}/${ebuild}'" + + local repo_uri uri + repo_uri="https://raw.githubusercontent.com/flatcar/scripts/${scripts_hash}/sdk_container/src/third_party/${spm_repo}" + uri="${repo_uri}/${ebuild}" + __slsa_rd_printf "${uri}" 'sha1' "${csum}" # The main sources - if [ -n "${A}" ] ; then + if [[ -n ${A} ]] ; then # Package is built from downloaded source tarball(s) # There can be multiple, and can be used conditionally based on use flags, # and even replaced with different local names ("http://... -> othername.tgz"). So # we go through what's actually used ($A), then find the corresponding source URI. - local src="" prev_uri="" rename="false" orig_name="" + local src prev_uri rename orig_name found for src in ${A}; do - local found="false" + found= + rename= + prev_uri='' + orig_name='' for uri in ${SRC_URI}; do - if [ "${uri}" = "->" ] ; then - rename="true" + if [[ ${uri} = '->' ]] ; then + rename=x continue fi - if [ "${src}" = "$(basename "${uri}")" ] ; then - orig_name="${src}" - if [ "${rename}" = "true" ] ; then - uri="${prev_uri}" - orig_name="$(basename "${uri}")" + if [[ ${src} = "$(basename "${uri}")" ]] ; then + orig_name=${src} + if [[ -n ${rename} ]] ; then + uri=${prev_uri} + orig_name=$(basename "${uri}") fi einfo " Provenance: recording tarball material (input) '${src}' ('${orig_name}')" - csum="$(sha512sum "${DISTDIR}/${src}" | cut -d' ' -f1)" - echo -e ",\n { \"uri\": \"${uri}\"," - echo -n " \"digest\": {\"sha512\":\"${csum}\"} }" - found="true" + csum=$(sha512sum "${DISTDIR}/${src}") + csum=${csum%% *} + __slsa_rd_printf "${uri}" 'sha512' "${csum}" + found=x fi - rename="false" - prev_uri="${uri}" + rename= + prev_uri=${uri} done - if [ "${found}" != "true" ] ; then + if [[ -z ${found} ]] ; then die "No SRC_URI found for source '${src}', unable to record provenance!" fi done - elif [ -n "${EGIT_REPO_URI:-}" ] ; then + elif [[ -n ${EGIT_REPO_URI:-} ]] ; then # package is built from repo checkout (git) einfo " Provenance: recording GIT material (input) '${EGIT_REPO_URI}'" - csum="${EGIT_COMMIT}" - uri="${EGIT_REPO_URI}" - echo -e ",\n { \"uri\": \"${uri}\"," - echo -n " \"digest\": {\"sha1\":\"$csum\"} }" + uri=${EGIT_REPO_URI} + csum=${EGIT_COMMIT} + __slsa_rd_printf "${uri}" 'sha1' "${csum}" fi # Patches / files shipped with the ebuild (if any) - csum="$(cat "/mnt/host/source/src/scripts/.git/modules/sdk_container/src/third_party/${repo}/HEAD")" - uri="git+https://github.com/flatcar/${repo}.git@${csum}#${CATEGORY}/${PN}/files" - if [ -d "${FILESDIR}" ] ; then + local files_uri + files_uri="${repo_uri}/${CATEGORY}/${PN}/files" + if [[ -d ${FILESDIR} ]] ; then for file in $(cd "$FILESDIR" && find . -type f | sed 's:^./::') ; do - csum="$(sha1sum - <"${FILESDIR}/${file}")" - csum="${csum%% *}" + uri="${files_uri}/${file}" + csum=$(sha1sum - <"${FILESDIR}/${file}") + csum=${csum%% *} einfo " Provenance: recording ebuild material (input) '${file}'" - echo -e ",\n { \"uri\": \"${uri}/${file}\"," - echo -n " \"digest\": {\"sha1\":\"$csum\"} }" + __slsa_rd_printf "${uri}" 'sha1' "${csum}" done fi - - echo -ne '\n ]' } # -- __slsa_provenance_report() { - local scripts_hash="$(cat "/mnt/host/source/src/scripts/.git/HEAD")" - local buildcmd="emerge" + local scripts_hash buildcmd board sdk_version + + scripts_hash=$(cat "/mnt/host/source/src/scripts/.git/HEAD") + if [[ ${scripts_hash} = "ref:"* ]]; then + scripts_hash=$(cat "/mnt/host/source/src/scripts/.git/${scripts_hash#'ref: '}") + fi + + buildcmd='emerge' # extract board from e.g. '/build/amd64-usr/build'. Empty if no board is set (SDK build). - local board="$(echo "${CROS_BUILD_BOARD_TREE:-}" | sed -n 's:^/build/\([^/]\+\)/.*:\1:p')" - if [ -n "$board" ] ; then + board=$(echo "${CROS_BUILD_BOARD_TREE:-}" | sed -n 's:^/build/\([^/]\+\)/.*:\1:p') + if [[ -n ${board} ]] ; then buildcmd="emerge-${board}" fi - if [[ "${scripts_hash}" == "ref:"* ]]; then - scripts_hash="$(cat /mnt/host/source/src/scripts/.git/${scripts_hash#ref: })" - fi # FIXME: Supply SDK image ID and sha256 digest along with the version tag - local sdk_version="$(source /mnt/host/source/.repo/manifests/version.txt; echo ${FLATCAR_SDK_VERSION})" + sdk_version=$(source /mnt/host/source/.repo/manifests/version.txt; echo "${FLATCAR_SDK_VERSION}") # FIXME: add builder ID -cat < "${T}/${report_file}" + __slsa_provenance_report | jq | zstd -19 --stdout --compress > "${T}/${report_file}" mkdir -p "${dest_dir}" mv "${T}/${report_file}" "${dest_dir}" } +# -- From 9faab4387ec615f2ef11afb8d91497005b9eed05 Mon Sep 17 00:00:00 2001 From: Krzesimir Nowak Date: Wed, 20 Sep 2023 10:29:41 +0200 Subject: [PATCH 2/3] changelog: Add an entry --- changelog/changes/2023-09-20-slsa.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/changes/2023-09-20-slsa.md diff --git a/changelog/changes/2023-09-20-slsa.md b/changelog/changes/2023-09-20-slsa.md new file mode 100644 index 00000000000..72d6cf695d5 --- /dev/null +++ b/changelog/changes/2023-09-20-slsa.md @@ -0,0 +1 @@ +- Update generation SLSA provenance info from v0.2 to v1.0. From 0993a9ada24fb77379ec5001159a461f70a7118c Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Fri, 17 Nov 2023 12:51:33 +0100 Subject: [PATCH 3/3] profiles: slsa: Remove quadratic complexity in SRC_URI iteration SLSA provenance generation iterates over $A (which is a subset of $SRC_URI) and for each of those tries to find a match in $SRC_URI. That's quadratic complexity, and the performance impact is bad because we shell out to a helper utility (basename) for every entry. This is leading to long stalls when generating SLSA for packages with long distfile lists, like go and rust packages. Iterate over SRC_URI once and create a dictionary to speed up subsequent lookups. dev-db/etcdctl is a good candidate for testing. Signed-off-by: Jeremi Piotrowski --- .../base/profile.bashrc.slsa-provenance | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/sdk_container/src/third_party/coreos-overlay/profiles/coreos/base/profile.bashrc.slsa-provenance b/sdk_container/src/third_party/coreos-overlay/profiles/coreos/base/profile.bashrc.slsa-provenance index f87af99ae36..66926477480 100644 --- a/sdk_container/src/third_party/coreos-overlay/profiles/coreos/base/profile.bashrc.slsa-provenance +++ b/sdk_container/src/third_party/coreos-overlay/profiles/coreos/base/profile.bashrc.slsa-provenance @@ -133,35 +133,35 @@ __slsa_provenance_resolved_dependencies() { # There can be multiple, and can be used conditionally based on use flags, # and even replaced with different local names ("http://... -> othername.tgz"). So # we go through what's actually used ($A), then find the corresponding source URI. - local src prev_uri rename orig_name found - for src in ${A}; do - found= + declare -A uri_dict=() uri_orig_names=() + local prev_uri='' rename='' base_name prev_base_name + for uri in ${SRC_URI}; do + if [[ ${uri} = '->' ]] ; then + rename=x + continue + fi + base_name=$(basename "${uri}") + uri_orig_names["${uri}"]=${base_name} + if [[ -n ${rename} ]] ; then + unset "uri_dict[${prev_base_name}]" + uri=${prev_uri} + fi + uri_dict["${base_name}"]=${uri} rename= - prev_uri='' - orig_name='' - for uri in ${SRC_URI}; do - if [[ ${uri} = '->' ]] ; then - rename=x - continue - fi - if [[ ${src} = "$(basename "${uri}")" ]] ; then - orig_name=${src} - if [[ -n ${rename} ]] ; then - uri=${prev_uri} - orig_name=$(basename "${uri}") - fi - einfo " Provenance: recording tarball material (input) '${src}' ('${orig_name}')" - csum=$(sha512sum "${DISTDIR}/${src}") - csum=${csum%% *} - __slsa_rd_printf "${uri}" 'sha512' "${csum}" - found=x - fi - rename= - prev_uri=${uri} - done - if [[ -z ${found} ]] ; then + prev_uri=${uri} + prev_base_name=${base_name} + done + local src orig_name + for src in ${A}; do + uri=${uri_dict["${src}"]:-} + if [[ -z ${uri} ]] ; then die "No SRC_URI found for source '${src}', unable to record provenance!" fi + orig_name=${uri_orig_names["${uri}"]} + einfo " Provenance: recording tarball material (input) '${src}' ('${orig_name}')" + csum=$(sha512sum "${DISTDIR}/${src}") + csum=${csum%% *} + __slsa_rd_printf "${uri}" 'sha512' "${csum}" done elif [[ -n ${EGIT_REPO_URI:-} ]] ; then # package is built from repo checkout (git)