From 49eb1a4c07ffbe64bce3ba36b93180f151645626 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Fri, 17 Nov 2023 12:51:33 +0100 Subject: [PATCH] profiles: slsa: Remove quadratic complexity in SRC_URI iteration SLSA provenance generation iterates over $A (which is a subset of $SRC_URI) and for each of those tries to find a match in $SRC_URI. That's quadratic complexity, and the performance impact is bad because we shell out to a helper utility (basename) for every entry. This is leading to long stalls when generating SLSA for packages with long distfile lists, like go and rust packages. Iterate over SRC_URI once and create a dictionary to speed up subsequent lookups. dev-db/etcdctl is a good candidate for testing. Signed-off-by: Jeremi Piotrowski --- .../base/profile.bashrc.slsa-provenance | 45 ++++++++++--------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/sdk_container/src/third_party/coreos-overlay/profiles/coreos/base/profile.bashrc.slsa-provenance b/sdk_container/src/third_party/coreos-overlay/profiles/coreos/base/profile.bashrc.slsa-provenance index 76a3c02f7f2..7074cf8d75e 100644 --- a/sdk_container/src/third_party/coreos-overlay/profiles/coreos/base/profile.bashrc.slsa-provenance +++ b/sdk_container/src/third_party/coreos-overlay/profiles/coreos/base/profile.bashrc.slsa-provenance @@ -77,29 +77,34 @@ __slsa_provenance_materials() { # There can be multiple, and can be used conditionally based on use flags, # and even replaced with different local names ("http://... -> othername.tgz"). So # we go through what's actually used ($A), then find the corresponding source URI. + declare -A uri_dict local src="" prev_uri="" rename="false" orig_name="" + for uri in ${SRC_URI}; do + if [ "${uri}" = "->" ] ; then + rename="true" + continue + fi + local base_name="$(basename "${uri}")" + if [ "${rename}" = "true" ] ; then + prev_base_name="$(basename "${prev_uri}")" + unset uri_dict["${prev_base_name}"] + uri="${prev_uri}" + fi + uri_dict["${base_name}"]="${uri}" + rename="false" + prev_uri="${uri}" + done for src in ${A}; do local found="false" - for uri in ${SRC_URI}; do - if [ "${uri}" = "->" ] ; then - rename="true" - continue - fi - if [ "${src}" = "$(basename "${uri}")" ] ; then - orig_name="${src}" - if [ "${rename}" = "true" ] ; then - uri="${prev_uri}" - orig_name="$(basename "${uri}")" - fi - einfo " Provenance: recording tarball material (input) '${src}' ('${orig_name}')" - csum="$(sha512sum "${DISTDIR}/${src}" | cut -d' ' -f1)" - echo -e ",\n { \"uri\": \"${uri}\"," - echo -n " \"digest\": {\"sha512\":\"${csum}\"} }" - found="true" - fi - rename="false" - prev_uri="${uri}" - done + uri="${uri_dict["${src}"]}" + if [ -n "${uri}" ] ; then + orig_name="${src}" + einfo " Provenance: recording tarball material (input) '${src}' ('${orig_name}')" + csum="$(sha512sum "${DISTDIR}/${src}" | cut -d' ' -f1)" + echo -e ",\n { \"uri\": \"${uri}\"," + echo -n " \"digest\": {\"sha512\":\"${csum}\"} }" + found="true" + fi if [ "${found}" != "true" ] ; then die "No SRC_URI found for source '${src}', unable to record provenance!" fi