Skip to content

Commit

Permalink
chore(publish) abstract environment variables and introduce feature f…
Browse files Browse the repository at this point in the history
…lag (#787)

* chore(publish) abstract environment variables and introduce feature flags (#784)

* chore(publish) abstract environment variables and introduce feature flags

Signed-off-by: Damien Duportal <[email protected]>

* comments fixup

Signed-off-by: Damien Duportal <[email protected]>

* Update site/publish.sh

Co-authored-by: Stéphane MERLE <[email protected]>

* enhance rsync ssh and rename pkg sync-task

---------

Signed-off-by: Damien Duportal <[email protected]>
Co-authored-by: Stéphane MERLE <[email protected]>

* Update site/publish.sh

* Apply suggestions from code review

Co-authored-by: Tim Jacomb <[email protected]>

---------

Signed-off-by: Damien Duportal <[email protected]>
Co-authored-by: Stéphane MERLE <[email protected]>
Co-authored-by: Tim Jacomb <[email protected]>
  • Loading branch information
3 people authored Jul 8, 2024
1 parent 34ddaff commit 60d6bef
Showing 1 changed file with 160 additions and 130 deletions.
290 changes: 160 additions & 130 deletions site/publish.sh
Original file line number Diff line number Diff line change
@@ -1,147 +1,177 @@
#!/bin/bash -ex

## Environment variables that could be configured at the job level:
# - OPT_IN_SYNC_FS_R2: (optional) Set it to "optin" to also update azure.updates.jenkins.io Files Share and R2 buckets

# Used later for rsyncing updates
UPDATES_SITE="updates.jenkins.io"
RSYNC_USER="mirrorbrain"

# For syncing R2 buckets aws-cli is configured through environment variables (from Jenkins credentials)
# https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html
export AWS_DEFAULT_REGION='auto'

## Install jq, required by generate.sh script
wget --no-verbose -O jq https://github.com/stedolan/jq/releases/download/jq-1.5/jq-linux64 || { echo "Failed to download jq" >&2 ; exit 1; }
chmod +x jq || { echo "Failed to make jq executable" >&2 ; exit 1; }

export PATH=.:$PATH

## Generate the content of 'www2' and 'download' folders
"$( dirname "$0" )/generate.sh" ./www2 ./download

## 'download' folder processing
# push plugins to mirrors.jenkins-ci.org
chmod -R a+r download
rsync -rlptDvz --chown=mirrorbrain:www-data --size-only download/plugins/ ${RSYNC_USER}@${UPDATES_SITE}:/srv/releases/jenkins/plugins

# Invoke a minimal mirrorsync to mirrorbits which will use the 'recent-releases.json' file as input
ssh ${RSYNC_USER}@${UPDATES_SITE} "cat > /tmp/update-center2-rerecent-releases.json" < www2/experimental/recent-releases.json
ssh ${RSYNC_USER}@${UPDATES_SITE} "/srv/releases/sync-recent-releases.sh /tmp/update-center2-rerecent-releases.json"

## 'www2' folder processing
chmod -R a+r www2

function parallelfunction() {
echo "=== parallelfunction: $1"

case $1 in
rsync*)
# Push generated index to the production server
time rsync --chown=mirrorbrain:www-data --recursive --links --perms --times -D \
--checksum --verbose --compress \
--exclude=/updates `# populated by https://github.com/jenkins-infra/crawler` \
--delete `# delete old sites` \
--stats `# add verbose statistics` \
./www2/ "${RSYNC_USER}@${UPDATES_SITE}:/var/www/${UPDATES_SITE}"
;;

azsync*)
# Script stored in /usr/local/bin used to generate a signed file share URL with a short-lived SAS token
# Source: https://github.com/jenkins-infra/pipeline-library/blob/master/resources/get-fileshare-signed-url.sh
fileShareUrl=$(get-fileshare-signed-url.sh)
# Sync Azure File Share content using www3 to avoid symlinks
time azcopy sync ./www3/ "${fileShareUrl}" \
--skip-version-check `# Do not check for new azcopy versions (we have updatecli for this)` \
--recursive=true \
--exclude-path="updates" `# populated by https://github.com/jenkins-infra/crawler` \
--delete-destination=true
;;

s3sync*)
# Retrieve the R2 bucket and the R2 endpoint from the task name passed as argument, minus "s3sync" prefix
updates_r2_bucket_and_endpoint="${1#s3sync}"
r2_bucket=${updates_r2_bucket_and_endpoint%|*}
r2_endpoint=${updates_r2_bucket_and_endpoint#*|}

# Sync CloudFlare R2 buckets content excluding 'updates' folder from www3 sync (without symlinks)
# as this folder is populated by https://github.com/jenkins-infra/crawler/blob/master/Jenkinsfile
time aws s3 sync ./www3/ "s3://${r2_bucket}/" \
--no-progress \
--no-follow-symlinks \
--size-only \
--exclude '.htaccess' \
--endpoint-url "${r2_endpoint}"
;;

*)
echo -n 'Warning: unknown parameter'
;;

esac
}

# Export local variables used in parallelfunction
export UPDATES_SITE
export RSYNC_USER

# Export variables used in parallelfunction/azsync/get-fileshare-signed-url.sh
export STORAGE_FILESHARE=updates-jenkins-io
export STORAGE_NAME=updatesjenkinsio
export STORAGE_DURATION_IN_MINUTE=5 # duration of the short-lived SAS token
export STORAGE_PERMISSIONS=dlrw

# Export function to use it with parallel
export -f parallelfunction

# parallel added within the permanent trusted agent here:
# https://github.com/jenkins-infra/jenkins-infra/blob/production/dist/profile/manifests/buildagent.pp
command -v parallel >/dev/null 2>&1 || { echo 'ERROR: parralel command not found. Exiting.'; exit 1; }

# Sync only updates.jenkins.io by default
tasks=('rsync')

# Sync updates.jenkins.io and azure.updates.jenkins.io File Share and R2 bucket(s) if the flag is set
if [[ ${OPT_IN_SYNC_FS_R2} == 'optin' ]]
## Environment variables that can be configured at the job level:
# - [optional] RUN_STAGES (string): list of top-level tasks ("stages") to execute. Separator is the pipe character '|'. Used to customize the tasks to run (when testing for instance)
# - [optional] SYNC_UC_TASKS (string): list of UC "sync" tasks to perform in parallel during the 'sync-uc' stage. Separator is the pipe character '|'. Used to customize the tasks to run (when testing for instance)
# - [mandatory] UPDATE_CENTER_FILESHARES_ENV_FILES (directory path): directory containing environment files to be sources for each sync. destination.
# Each task named XX expects a file named 'env-XX' in this directory to be sourced by the script to retrieve settings for the task.
RUN_STAGES="${RUN_STAGES:-generate-site|sync-plugins|sync-uc}"
SYNC_UC_TASKS="${SYNC_UC_TASKS:-rsync-updates.jenkins.io|azsync-content|s3sync-westeurope}"

# Split strings to arrays for feature flags setup
run_stages=()
IFS='|' read -r -a run_stages <<< "${RUN_STAGES}"

www2_dir="${WWW2_DIR:-./www2}"
download_dir="${WWW2_DIR:-./download}"

if [[ "${run_stages[*]}" =~ 'generate-site' ]]
then
# TIME sync, used by mirrorbits to know the last update date to take in account
date +%s > ./www2/TIME
## Install jq, required by generate.sh script
wget --no-verbose -O jq https://github.com/stedolan/jq/releases/download/jq-1.5/jq-linux64 || { echo "Failed to download jq" >&2 ; exit 1; }
chmod +x jq || { echo "Failed to make jq executable" >&2 ; exit 1; }

export PATH=.:$PATH

## Generate the content of $www2_dir and $download_dir folders
"$( dirname "$0" )/generate.sh" "${www2_dir}" "${download_dir}"
fi

if [[ "${run_stages[*]}" =~ 'sync-plugins' ]]
then
UPDATES_SITE="updates.jenkins.io"
RSYNC_USER="mirrorbrain"

## $download_dir folder processing
# push plugins to mirrors.jenkins-ci.org
chmod -R a+r "${download_dir}"
rsync -rlptDvz --chown=mirrorbrain:www-data --size-only "${download_dir}"/plugins/ "${RSYNC_USER}@${UPDATES_SITE}":/srv/releases/jenkins/plugins

# Invoke a minimal mirrorsync to mirrorbits which will use the 'recent-releases.json' file as input
ssh "${RSYNC_USER}@${UPDATES_SITE}" "cat > /tmp/update-center2-rerecent-releases.json" < "${www2_dir}"/experimental/recent-releases.json
ssh "${RSYNC_USER}@${UPDATES_SITE}" "/srv/releases/sync-recent-releases.sh /tmp/update-center2-rerecent-releases.json"
fi

if [[ "${run_stages[*]}" =~ 'sync-uc' ]]
then
# Ensure credentials are defined
: "${UPDATE_CENTER_FILESHARES_ENV_FILES?}"

sync_uc_tasks=()
IFS='|' read -r -a sync_uc_tasks <<< "${SYNC_UC_TASKS}"

command -v parallel >/dev/null 2>&1 || { echo 'ERROR: parallel command not found. Exiting.'; exit 1; }

# Define function to be called for each parallel UC tasks (see call after the function code)
function parallelfunction() {
echo "=== parallelfunction: $1"

# Load the env variables (setting up and credentials) corresponding to the bucket to sync to
# Note that some variables are needed by get-fileshare-signed-url.sh
envToLoad="${UPDATE_CENTER_FILESHARES_ENV_FILES}/.env-${1}"
# shellcheck source=/dev/null
source "${envToLoad}"

: "${FILESHARE_SYNC_SOURCE?}"

# Ensure absolute path WITH a trailing slash (as it will be a source for different commands where it has a meaning)
local fileshare_sync_source_abs
fileshare_sync_source_abs="$(cd "${FILESHARE_SYNC_SOURCE}" && pwd -P)/"

case $1 in
rsync*)
# Required variables that should now be set from the .env file
: "${RSYNC_HOST?}" "${RSYNC_USER?}" "${RSYNC_GROUP?}" "${RSYNC_REMOTE_DIR?}" "${RSYNC_IDENTITY_NAME?}"

time rsync --chown="${RSYNC_USER}":"${RSYNC_GROUP}" --recursive --links --perms --times -D \
--rsh="ssh -i ${UPDATE_CENTER_FILESHARES_ENV_FILES}/${RSYNC_IDENTITY_NAME}" `# rsync identity file is stored with .env files` \
--checksum --verbose --compress \
--exclude=/updates `# populated by https://github.com/jenkins-infra/crawler` \
--delete `# delete old sites` \
--stats `# add verbose statistics` \
"${fileshare_sync_source_abs}" "${RSYNC_USER}"@"${RSYNC_HOST}":"${RSYNC_REMOTE_DIR}"
;;

azsync*)
# Required variables that should now be set from the .env file
: "${STORAGE_NAME?}" "${STORAGE_FILESHARE?}" "${STORAGE_DURATION_IN_MINUTE?}" "${STORAGE_PERMISSIONS?}" "${JENKINS_INFRA_FILESHARE_CLIENT_ID?}" "${JENKINS_INFRA_FILESHARE_CLIENT_SECRET?}" "${JENKINS_INFRA_FILESHARE_TENANT_ID?}"

## 'get-fileshare-signed-url.sh' command is a script stored in /usr/local/bin used to generate a signed file share URL with a short-lived SAS token
## Source: https://github.com/jenkins-infra/pipeline-library/blob/master/resources/get-fileshare-signed-url.sh
fileShareUrl="$(get-fileshare-signed-url.sh)"
# Fail fast if no share URL can be generated
: "${fileShareUrl?}"

# Sync Azure File Share
time azcopy sync \
--skip-version-check `# Do not check for new azcopy versions (we have updatecli for this)` \
--recursive=true \
--exclude-path="updates" `# populated by https://github.com/jenkins-infra/crawler` \
--delete-destination=true \
"${fileshare_sync_source_abs}" "${fileShareUrl}"
;;

s3sync*)
# Required variables that should now be set from the .env file
: "${BUCKET_NAME?}" "${BUCKET_ENDPOINT_URL?}" "${AWS_ACCESS_KEY_ID?}" "${AWS_SECRET_ACCESS_KEY?}" "${AWS_DEFAULT_REGION?}"

# Sync 'www-content' (without symlinks) to the bucket,
# excluding 'updates/' folder as it is populated by https://github.com/jenkins-infra/crawler/blob/master/Jenkinsfile
time aws s3 sync \
--no-progress \
--no-follow-symlinks \
--size-only \
--exclude '.htaccess' \
--endpoint-url "${BUCKET_ENDPOINT_URL}" \
"${fileshare_sync_source_abs}" "s3://${BUCKET_NAME}/"
;;

*)
echo -n "Warning: unknown sync UC task: ${1}"
;;

esac
}
# Export function to use it with parallel
export -f parallelfunction

############# Prepare the different UC source directories to be copied to different destinations
chmod -R a+r "${www2_dir}"
date +%s > "${www2_dir}"/TIME # TIME sync, used by mirrorbits to know the last update date to take in account

## No need to remove the symlinks as the `azcopy sync` for symlinks is not yet supported and we use `--no-follow-symlinks` for `aws s3 sync`
# Perform a copy with dereference symlink (object storage do not support symlinks)
rm -rf ./www3/ # Cleanup

rm -rf ./www-content/ ./www-redirections/ # Cleanup

# Prepare www-content, a copy of the $www2_dir dedicated to mirrorbits service, excluding every .htaccess files
rsync --archive --verbose \
--copy-links `# derefence symlinks` \
--safe-links `# ignore symlinks outside of copied tree` \
--exclude='updates' `# Exclude ALL 'updates' directories, not only the root /updates (because symlink dereferencing create additional directories` \
./www2/ ./www3/

# Add File Share sync to the tasks
tasks+=('azsync')

# Add each R2 bucket sync to the tasks
updates_r2_bucket_and_endpoint_pairs=("westeurope-updates-jenkins-io|https://8d1838a43923148c5cee18ccc356a594.r2.cloudflarestorage.com")
for r2_bucket_and_endpoint_pair in "${updates_r2_bucket_and_endpoint_pairs[@]}"
do
tasks+=("s3sync${r2_bucket_and_endpoint_pair}")
done
fi

echo '----------------------- Launch synchronisation(s) -----------------------'
parallel --halt-on-error now,fail=1 parallelfunction ::: "${tasks[@]}"

# Wait for all deferred tasks
echo '============================ all done ============================'
--prune-empty-dirs `# Do not copy empty directories` \
--exclude='updates/' `# Exclude ALL 'updates' directories, not only the root /updates (because symlink dereferencing create additional directories` \
--exclude='.htaccess' `# Exclude every .htaccess files` \
"${www2_dir}"/ ./www-content/

# Trigger a mirror scan on mirrorbits if the flag is set
if [[ ${OPT_IN_SYNC_FS_R2} == 'optin' ]]
then
# Prepare www-redirections, a copy of $www2_dir dedicated to httpd service, including only .htaccess files (TODO: and html for plugin versions listing?)
rsync --archive --verbose \
--copy-links `# derefence symlinks` \
--safe-links `# ignore symlinks outside of copied tree` \
--prune-empty-dirs `# Do not copy empty directories` \
--include "*/" `# Includes all directories in the filtering` \
--include=".htaccess" `# Includes all elements named '.htaccess' in the filtering - redirections logic` \
--exclude="*" `# Exclude all elements found in source and not matching pattern aboves (must be the last filter flag)` \
"${www2_dir}"/ ./www-redirections/

# Append the httpd -> mirrorbits redirection as fallback (end of htaccess file) for www-redirections only
mirrorbits_hostname='mirrors.updates.jenkins.io'
{
echo ''
echo "## Fallback: if not rules match then redirect to ${mirrorbits_hostname}"
echo "RewriteRule ^.* https://${mirrorbits_hostname}%{REQUEST_URI}? [NC,L,R=307]"
} >> ./www-redirections/.htaccess

echo '----------------------- Launch synchronisation(s) -----------------------'
parallel --halt-on-error now,fail=1 parallelfunction ::: "${sync_uc_tasks[@]}"

# Wait for all deferred tasks
echo '============================ all parallel sync tasks done ============================'

# Trigger a mirror scan on mirrorbits once all synchronized copies are finished
echo '== Triggering a mirror scan on mirrorbits...'
# Kubernetes namespace of mirrorbits
mirrorbits_namespace='updates-jenkins-io'

# Requires a valid kubernetes credential file at $KUBECONFIG or $HOME/.kube/config by default
pod_name=$(kubectl --namespace="${mirrorbits_namespace}" --no-headers=true get pod --output=name | grep mirrorbits | head -n1)
pod_name="$(kubectl --namespace="${mirrorbits_namespace}" --no-headers=true get pod --output=name | grep mirrorbits | head -n1)"
kubectl --namespace="${mirrorbits_namespace}" --container=mirrorbits exec "${pod_name}" -- mirrorbits scan -all -enable -timeout=120
fi

0 comments on commit 60d6bef

Please sign in to comment.