From 36c4dcd5a4ce242c95b65ad3d2eab9cd8e85c016 Mon Sep 17 00:00:00 2001 From: Damien Duportal Date: Wed, 29 May 2024 18:09:30 +0200 Subject: [PATCH 1/4] chore(publish) abstract environment variables and introduce feature flags Signed-off-by: Damien Duportal --- site/publish.sh | 287 ++++++++++++++++++++++++++---------------------- 1 file changed, 157 insertions(+), 130 deletions(-) diff --git a/site/publish.sh b/site/publish.sh index ff77e536f..05166d1e3 100755 --- a/site/publish.sh +++ b/site/publish.sh @@ -1,147 +1,174 @@ #!/bin/bash -ex -## Environment variables that could be configured at the job level: -# - OPT_IN_SYNC_FS_R2: (optional) Set it to "optin" to also update azure.updates.jenkins.io Files Share and R2 buckets - -# Used later for rsyncing updates -UPDATES_SITE="updates.jenkins.io" -RSYNC_USER="mirrorbrain" - -# For syncing R2 buckets aws-cli is configured through environment variables (from Jenkins credentials) -# https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html -export AWS_DEFAULT_REGION='auto' - -## Install jq, required by generate.sh script -wget --no-verbose -O jq https://github.com/stedolan/jq/releases/download/jq-1.5/jq-linux64 || { echo "Failed to download jq" >&2 ; exit 1; } -chmod +x jq || { echo "Failed to make jq executable" >&2 ; exit 1; } - -export PATH=.:$PATH - -## Generate the content of 'www2' and 'download' folders -"$( dirname "$0" )/generate.sh" ./www2 ./download - -## 'download' folder processing -# push plugins to mirrors.jenkins-ci.org -chmod -R a+r download -rsync -rlptDvz --chown=mirrorbrain:www-data --size-only download/plugins/ ${RSYNC_USER}@${UPDATES_SITE}:/srv/releases/jenkins/plugins - -# Invoke a minimal mirrorsync to mirrorbits which will use the 'recent-releases.json' file as input -ssh ${RSYNC_USER}@${UPDATES_SITE} "cat > /tmp/update-center2-rerecent-releases.json" < www2/experimental/recent-releases.json -ssh ${RSYNC_USER}@${UPDATES_SITE} "/srv/releases/sync-recent-releases.sh /tmp/update-center2-rerecent-releases.json" - -## 'www2' folder processing -chmod -R a+r www2 - -function parallelfunction() { - echo "=== parallelfunction: $1" - - case $1 in - rsync*) - # Push generated index to the production server - time rsync --chown=mirrorbrain:www-data --recursive --links --perms --times -D \ - --checksum --verbose --compress \ - --exclude=/updates `# populated by https://github.com/jenkins-infra/crawler` \ - --delete `# delete old sites` \ - --stats `# add verbose statistics` \ - ./www2/ "${RSYNC_USER}@${UPDATES_SITE}:/var/www/${UPDATES_SITE}" - ;; - - azsync*) - # Script stored in /usr/local/bin used to generate a signed file share URL with a short-lived SAS token - # Source: https://github.com/jenkins-infra/pipeline-library/blob/master/resources/get-fileshare-signed-url.sh - fileShareUrl=$(get-fileshare-signed-url.sh) - # Sync Azure File Share content using www3 to avoid symlinks - time azcopy sync ./www3/ "${fileShareUrl}" \ - --skip-version-check `# Do not check for new azcopy versions (we have updatecli for this)` \ - --recursive=true \ - --exclude-path="updates" `# populated by https://github.com/jenkins-infra/crawler` \ - --delete-destination=true - ;; - - s3sync*) - # Retrieve the R2 bucket and the R2 endpoint from the task name passed as argument, minus "s3sync" prefix - updates_r2_bucket_and_endpoint="${1#s3sync}" - r2_bucket=${updates_r2_bucket_and_endpoint%|*} - r2_endpoint=${updates_r2_bucket_and_endpoint#*|} - - # Sync CloudFlare R2 buckets content excluding 'updates' folder from www3 sync (without symlinks) - # as this folder is populated by https://github.com/jenkins-infra/crawler/blob/master/Jenkinsfile - time aws s3 sync ./www3/ "s3://${r2_bucket}/" \ - --no-progress \ - --no-follow-symlinks \ - --size-only \ - --exclude '.htaccess' \ - --endpoint-url "${r2_endpoint}" - ;; - - *) - echo -n 'Warning: unknown parameter' - ;; - - esac -} - -# Export local variables used in parallelfunction -export UPDATES_SITE -export RSYNC_USER - -# Export variables used in parallelfunction/azsync/get-fileshare-signed-url.sh -export STORAGE_FILESHARE=updates-jenkins-io -export STORAGE_NAME=updatesjenkinsio -export STORAGE_DURATION_IN_MINUTE=5 # duration of the short-lived SAS token -export STORAGE_PERMISSIONS=dlrw - -# Export function to use it with parallel -export -f parallelfunction - -# parallel added within the permanent trusted agent here: -# https://github.com/jenkins-infra/jenkins-infra/blob/production/dist/profile/manifests/buildagent.pp -command -v parallel >/dev/null 2>&1 || { echo 'ERROR: parralel command not found. Exiting.'; exit 1; } - -# Sync only updates.jenkins.io by default -tasks=('rsync') - -# Sync updates.jenkins.io and azure.updates.jenkins.io File Share and R2 bucket(s) if the flag is set -if [[ ${OPT_IN_SYNC_FS_R2} == 'optin' ]] +## Environment variables that can be configured at the job level: +# - [optional] SYNC_TASKS (string): list of script (sync.) tasks to execute. Separator is the pipe character '|'. Used to customize the tasks to run (when testing for instance) +# - [mandatory] UPDATE_CENTER_FILESHARES_ENV_FILES (directory path): directory containing environment files to be sources for each sync. destination. +# Each task named XX expects a file named 'env-XX' in this directory to be sourced by the script to retrieve settings for the task. +RUN_STAGES="${RUN_STAGES:-'generate-site|sync-plugins|sync-uc'}" +SYNC_UC_TASKS="${SYNC_UC_TASKS:-'rsync-pkg|azsync-content'|'s3sync-westeurope'}" + +# Split strings to arrays for feature flags setup +run_stages=() +IFS='|' read -r -a run_stages <<< "${RUN_STAGES}" + +if [[ "${run_stages[*]}" =~ 'generate-site' ]] then - # TIME sync, used by mirrorbits to know the last update date to take in account - date +%s > ./www2/TIME + ## Install jq, required by generate.sh script + wget --no-verbose -O jq https://github.com/stedolan/jq/releases/download/jq-1.5/jq-linux64 || { echo "Failed to download jq" >&2 ; exit 1; } + chmod +x jq || { echo "Failed to make jq executable" >&2 ; exit 1; } + + export PATH=.:$PATH + + ## Generate the content of 'www2' and 'download' folders + "$( dirname "$0" )/generate.sh" ./www2 ./download +fi + +if [[ "${run_stages[*]}" =~ 'sync-plugins' ]] +then + UPDATES_SITE="updates.jenkins.io" + RSYNC_USER="mirrorbrain" + + ## 'download' folder processing + # push plugins to mirrors.jenkins-ci.org + chmod -R a+r download + rsync -rlptDvz --chown=mirrorbrain:www-data --size-only download/plugins/ "${RSYNC_USER}@${UPDATES_SITE}":/srv/releases/jenkins/plugins + + # Invoke a minimal mirrorsync to mirrorbits which will use the 'recent-releases.json' file as input + ssh "${RSYNC_USER}@${UPDATES_SITE}" "cat > /tmp/update-center2-rerecent-releases.json" < www2/experimental/recent-releases.json + ssh "${RSYNC_USER}@${UPDATES_SITE}" "/srv/releases/sync-recent-releases.sh /tmp/update-center2-rerecent-releases.json" +fi + +if [[ "${run_stages[*]}" =~ 'sync-uc' ]] +then + sync_uc_tasks=() + IFS='|' read -r -a sync_uc_tasks <<< "${RUN_STAGES}" + + # parallel added within the permanent trusted agent here: + # https://github.com/jenkins-infra/jenkins-infra/blob/production/dist/profile/manifests/buildagent.pp + command -v parallel >/dev/null 2>&1 || { echo 'ERROR: parallel command not found. Exiting.'; exit 1; } + + # Ensure credentials is defined + : "${UPDATE_CENTER_FILESHARES_ENV_FILES?}" + + # Define function to be called for each parallel UC tasks (see call after the function code) + function parallelfunction() { + echo "=== parallelfunction: $1" + + # Load the env variables (setting up and credentials) corresponding to the bucket to sync to + # Note that some variables are needed by get-fileshare-signed-url.sh + envToLoad="${UPDATE_CENTER_FILESHARES_ENV_FILES}/.env-${1}" + # shellcheck source=/dev/null + source "${envToLoad}" + + : "${FILESHARE_SYNC_SOURCE?}" + + # Ensure absolute path WITH a trailing slash (as it will be a source for different commands where it has a meaning) + local fileshare_sync_source_abs + fileshare_sync_source_abs="$(cd "${FILESHARE_SYNC_SOURCE}" && pwd -P)/" + + case $1 in + rsync*) + # Required variables that should now be set from the .env file + : "${RSYNC_HOST?}" "${RSYNC_USER?}" "${RSYNC_GROUP?}" "${RSYNC_REMOTE_DIR?}" + + time rsync --chown="${RSYNC_USER}":"${RSYNC_GROUP}" --recursive --links --perms --times -D \ + --checksum --verbose --compress \ + --exclude=/updates `# populated by https://github.com/jenkins-infra/crawler` \ + --delete `# delete old sites` \ + --stats `# add verbose statistics` \ + "${fileshare_sync_source_abs}" "${RSYNC_USER}"@"${RSYNC_HOST}":"${RSYNC_REMOTE_DIR}" + ;; + + azsync*) + # Required variables that should now be set from the .env file + : "${STORAGE_NAME?}" "${STORAGE_FILESHARE?}" "${STORAGE_DURATION_IN_MINUTE?}" "${STORAGE_PERMISSIONS?}" "${JENKINS_INFRA_FILESHARE_CLIENT_ID?}" "${JENKINS_INFRA_FILESHARE_CLIENT_SECRET?}" "${JENKINS_INFRA_FILESHARE_TENANT_ID?}" + + ## 'get-fileshare-signed-url.sh' command is a script stored in /usr/local/bin used to generate a signed file share URL with a short-lived SAS token + ## Source: https://github.com/jenkins-infra/pipeline-library/blob/master/resources/get-fileshare-signed-url.sh + fileShareUrl="$(get-fileshare-signed-url.sh)" + # Fail fast if no share URL can be generated + : "${fileShareUrl?}" + + # Sync Azure File Share + time azcopy sync \ + --skip-version-check `# Do not check for new azcopy versions (we have updatecli for this)` \ + --recursive=true \ + --exclude-path="updates" `# populated by https://github.com/jenkins-infra/crawler` \ + --delete-destination=true \ + "${fileshare_sync_source_abs}" "${fileShareUrl}" + ;; + + s3sync*) + # Required variables that should now be set from the .env file + : "${BUCKET_NAME?}" "${BUCKET_ENDPOINT_URL?}" "${AWS_ACCESS_KEY_ID?}" "${AWS_SECRET_ACCESS_KEY?}" "${AWS_DEFAULT_REGION?}" + + # Sync 'www-content' (without symlinks) to the bucket, + # excluding 'updates/' folderas it is populated by https://github.com/jenkins-infra/crawler/blob/master/Jenkinsfile + time aws s3 sync \ + --no-progress \ + --no-follow-symlinks \ + --size-only \ + --exclude '.htaccess' \ + --endpoint-url "${BUCKET_ENDPOINT_URL}" \ + "${fileshare_sync_source_abs}" "s3://${BUCKET_NAME}/" + ;; + + *) + echo -n "Warning: unknown sync UC task: ${1}" + ;; + + esac + } + # Export function to use it with parallel + export -f parallelfunction + + ############# Prepare the different UC source directories to be copied to different destinations + chmod -R a+r www2 + date +%s > ./www2/TIME # TIME sync, used by mirrorbits to know the last update date to take in account ## No need to remove the symlinks as the `azcopy sync` for symlinks is not yet supported and we use `--no-follow-symlinks` for `aws s3 sync` # Perform a copy with dereference symlink (object storage do not support symlinks) - rm -rf ./www3/ # Cleanup - + rm -rf ./www-content/ ./www-redirections/ # Cleanup + + # Prepare www-content, a copy of www2 dedicated to mirrorbits service, excluding every .htaccess files rsync --archive --verbose \ --copy-links `# derefence symlinks` \ --safe-links `# ignore symlinks outside of copied tree` \ - --exclude='updates' `# Exclude ALL 'updates' directories, not only the root /updates (because symlink dereferencing create additional directories` \ - ./www2/ ./www3/ - - # Add File Share sync to the tasks - tasks+=('azsync') - - # Add each R2 bucket sync to the tasks - updates_r2_bucket_and_endpoint_pairs=("westeurope-updates-jenkins-io|https://8d1838a43923148c5cee18ccc356a594.r2.cloudflarestorage.com") - for r2_bucket_and_endpoint_pair in "${updates_r2_bucket_and_endpoint_pairs[@]}" - do - tasks+=("s3sync${r2_bucket_and_endpoint_pair}") - done -fi - -echo '----------------------- Launch synchronisation(s) -----------------------' -parallel --halt-on-error now,fail=1 parallelfunction ::: "${tasks[@]}" - -# Wait for all deferred tasks -echo '============================ all done ============================' + --prune-empty-dirs `# Do not copy empty directories` \ + --exclude='updates/' `# Exclude ALL 'updates' directories, not only the root /updates (because symlink dereferencing create additional directories` \ + --exclude='.htaccess' `# Exclude every .htaccess files` \ + ./www2/ ./www-content/ -# Trigger a mirror scan on mirrorbits if the flag is set -if [[ ${OPT_IN_SYNC_FS_R2} == 'optin' ]] -then + # Prepare www-redirections, a copy of www2 dedicated to httpd service, including only .htaccess files (TODO: and html for plugin versions listing?) + rsync --archive --verbose \ + --copy-links `# derefence symlinks` \ + --safe-links `# ignore symlinks outside of copied tree` \ + --prune-empty-dirs `# Do not copy empty directories` \ + --include "*/" `# Includes all directories in the filtering` \ + --include=".htaccess" `# Includes all elements named '.htaccess' in the filtering - redirections logic` \ + --exclude="*" `# Exclude all elements found in source and not matching pattern aboves (must be the last filter flag)` \ + ./www2/ ./www-redirections/ + + # Append the httpd -> mirrorbits redirection as fallback (end of htaccess file) for www-redirections only + mirrorbits_hostname='mirrors.updates.jenkins.io' + { + echo '' + echo "## Fallback: if not rules match then redirect to ${mirrorbits_hostname}" + echo "RewriteRule ^.* https://${mirrorbits_hostname}%{REQUEST_URI}? [NC,L,R=307]" + } >> ./www-redirections/.htaccess + + echo '----------------------- Launch synchronisation(s) -----------------------' + parallel --halt-on-error now,fail=1 parallelfunction ::: "${sync_uc_tasks[@]}" + + # Wait for all deferred tasks + echo '============================ all parallel sync tasks done ============================' + + # Trigger a mirror scan on mirrorbits once all synchronized copies are finished echo '== Triggering a mirror scan on mirrorbits...' # Kubernetes namespace of mirrorbits mirrorbits_namespace='updates-jenkins-io' # Requires a valid kubernetes credential file at $KUBECONFIG or $HOME/.kube/config by default - pod_name=$(kubectl --namespace="${mirrorbits_namespace}" --no-headers=true get pod --output=name | grep mirrorbits | head -n1) + pod_name="$(kubectl --namespace="${mirrorbits_namespace}" --no-headers=true get pod --output=name | grep mirrorbits | head -n1)" kubectl --namespace="${mirrorbits_namespace}" --container=mirrorbits exec "${pod_name}" -- mirrorbits scan -all -enable -timeout=120 fi From 044001fcba32936c3c85534e287d0f13e045c401 Mon Sep 17 00:00:00 2001 From: Damien Duportal Date: Wed, 29 May 2024 18:18:09 +0200 Subject: [PATCH 2/4] comments fixup Signed-off-by: Damien Duportal --- site/publish.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/site/publish.sh b/site/publish.sh index 05166d1e3..c00b4599f 100755 --- a/site/publish.sh +++ b/site/publish.sh @@ -1,7 +1,8 @@ #!/bin/bash -ex ## Environment variables that can be configured at the job level: -# - [optional] SYNC_TASKS (string): list of script (sync.) tasks to execute. Separator is the pipe character '|'. Used to customize the tasks to run (when testing for instance) +# - [optional] RUN_STAGES (string): list of top-level tasks ("stages") to execute. Separator is the pipe character '|'. Used to customize the tasks to run (when testing for instance) +# - [optional] SYNC_UC_TASKS (string): list of UC "sync" tasks to perform in parallel during the 'sync-uc' stage. Separator is the pipe character '|'. Used to customize the tasks to run (when testing for instance) # - [mandatory] UPDATE_CENTER_FILESHARES_ENV_FILES (directory path): directory containing environment files to be sources for each sync. destination. # Each task named XX expects a file named 'env-XX' in this directory to be sourced by the script to retrieve settings for the task. RUN_STAGES="${RUN_STAGES:-'generate-site|sync-plugins|sync-uc'}" From 5a793e7f4926b591f3d9195a799e17a1f58cf71b Mon Sep 17 00:00:00 2001 From: Damien Duportal Date: Thu, 30 May 2024 08:44:10 +0200 Subject: [PATCH 3/4] Update site/publish.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Stéphane MERLE <95630726+smerle33@users.noreply.github.com> --- site/publish.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/publish.sh b/site/publish.sh index c00b4599f..9e0a76621 100755 --- a/site/publish.sh +++ b/site/publish.sh @@ -6,7 +6,7 @@ # - [mandatory] UPDATE_CENTER_FILESHARES_ENV_FILES (directory path): directory containing environment files to be sources for each sync. destination. # Each task named XX expects a file named 'env-XX' in this directory to be sourced by the script to retrieve settings for the task. RUN_STAGES="${RUN_STAGES:-'generate-site|sync-plugins|sync-uc'}" -SYNC_UC_TASKS="${SYNC_UC_TASKS:-'rsync-pkg|azsync-content'|'s3sync-westeurope'}" +SYNC_UC_TASKS="${SYNC_UC_TASKS:-'rsync-pkg|azsync-content|s3sync-westeurope'}" # Split strings to arrays for feature flags setup run_stages=() From 00d65461e5ab75f6391710203565bc80d6ae0a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20MERLE?= <95630726+smerle33@users.noreply.github.com> Date: Fri, 31 May 2024 16:22:29 +0200 Subject: [PATCH 4/4] enhance rsync ssh and rename pkg sync-task --- site/publish.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/site/publish.sh b/site/publish.sh index 9e0a76621..f62c7865e 100755 --- a/site/publish.sh +++ b/site/publish.sh @@ -6,7 +6,7 @@ # - [mandatory] UPDATE_CENTER_FILESHARES_ENV_FILES (directory path): directory containing environment files to be sources for each sync. destination. # Each task named XX expects a file named 'env-XX' in this directory to be sourced by the script to retrieve settings for the task. RUN_STAGES="${RUN_STAGES:-'generate-site|sync-plugins|sync-uc'}" -SYNC_UC_TASKS="${SYNC_UC_TASKS:-'rsync-pkg|azsync-content|s3sync-westeurope'}" +SYNC_UC_TASKS="${SYNC_UC_TASKS:-'rsync-updates.jenkins.io|azsync-content|s3sync-westeurope'}" # Split strings to arrays for feature flags setup run_stages=() @@ -70,9 +70,10 @@ then case $1 in rsync*) # Required variables that should now be set from the .env file - : "${RSYNC_HOST?}" "${RSYNC_USER?}" "${RSYNC_GROUP?}" "${RSYNC_REMOTE_DIR?}" + : "${RSYNC_HOST?}" "${RSYNC_USER?}" "${RSYNC_GROUP?}" "${RSYNC_REMOTE_DIR?}" "${RSYNC_IDENTITY_NAME?}" time rsync --chown="${RSYNC_USER}":"${RSYNC_GROUP}" --recursive --links --perms --times -D \ + --rsh="ssh -i ${UPDATE_CENTER_FILESHARES_ENV_FILES}/${RSYNC_IDENTITY_NAME}" `# rsync identity file is stored with .env files` \ --checksum --verbose --compress \ --exclude=/updates `# populated by https://github.com/jenkins-infra/crawler` \ --delete `# delete old sites` \