From 21ea6470a13b4d6619551d76457f547099416c95 Mon Sep 17 00:00:00 2001 From: Narthana Epa Date: Sat, 2 Sep 2023 09:06:43 +1000 Subject: [PATCH 1/4] Disable debug logging and add manual logging elastic stack startup scripts --- packer/linux/conf/bin/bk-configure-docker.sh | 58 ++++- .../conf/bin/bk-install-elastic-stack.sh | 229 ++++++++++++------ .../conf/bin/bk-mount-instance-storage.sh | 71 ++++-- 3 files changed, 263 insertions(+), 95 deletions(-) diff --git a/packer/linux/conf/bin/bk-configure-docker.sh b/packer/linux/conf/bin/bk-configure-docker.sh index 2c4dd035c..17062f538 100755 --- a/packer/linux/conf/bin/bk-configure-docker.sh +++ b/packer/linux/conf/bin/bk-configure-docker.sh @@ -1,46 +1,79 @@ -#!/bin/bash +#!/usr/bin/env bash # shellcheck disable=SC2094 -set -euxo pipefail -## Configures docker before system starts +set -Eeuo pipefail + +on_error() { + local exit_code="$?" + local error_line="$1" + + echo "${BASH_SOURCE[0]} errored with exit code ${exit_code} on line ${error_line}." + exit "$exit_code" +} + +trap 'on_error $LINENO' ERR + +on_exit() { + echo "${BASH_SOURCE[0]} completed successfully." >&2 +} + +trap on_exit EXIT + +## Configure docker before system starts # Write to system console and to our log file # See https://alestic.com/2010/12/ec2-user-data-output/ exec > >(tee -a /var/log/elastic-stack.log | logger -t user-data -s 2>/dev/console) 2>&1 +echo Reading variables from AMI creation... >&2 # shellcheck disable=SC1091 source /usr/local/lib/bk-configure-docker.sh -# Set user namespace remapping in config if [[ "${DOCKER_USERNS_REMAP:-false}" == "true" ]]; then + echo Configuring user namespace remapping... >&2 + cat <<< "$(jq '."userns-remap"="buildkite-agent"' /etc/docker/daemon.json)" > /etc/docker/daemon.json - cat < /etc/subuid + + echo Writing subuid... >&2 + cat < /etc/subgid + + echo Writing subgid... >&2 + cat <&2 fi -# Set experimental in config if [[ "${DOCKER_EXPERIMENTAL:-false}" == "true" ]]; then + echo Configuring experiment flag for docker daemon... >&2 cat <<< "$(jq '.experimental=true' /etc/docker/daemon.json)" > /etc/docker/daemon.json +else + echo Experiment flag for docker daemon not configured. >&2 fi -# Move docker root to the ephemeral device if [[ "${BUILDKITE_ENABLE_INSTANCE_STORAGE:-false}" == "true" ]]; then + echo Creating docker root directory in instance storage... >&2 mkdir -p /mnt/ephemeral/docker + echo Configuring docker root directory to be in instance storage... >&2 cat <<< "$(jq '."data-root"="/mnt/ephemeral/docker"' /etc/docker/daemon.json)" > /etc/docker/daemon.json +else + echo Instance storage not configured. >&2 fi -# Customise address pools -cat <<<"$(jq '."default-address-pools"=[{"base":"172.17.0.0/12","size":20},{"base":"192.168.0.0/16","size":24}]' /etc/docker/daemon.json)" >/etc/docker/daemon.json +echo Customising docker IP address pools... >&2 +cat <<<"$(jq \ + '."default-address-pools"=[{"base":"172.17.0.0/12","size":20},{"base":"192.168.0.0/16","size":24}]' \ + /etc/docker/daemon.json \ +)" >/etc/docker/daemon.json # See https://docs.docker.com/build/building/multi-platform/ -echo Installing qemu binfmt for multiarch... +echo Installing qemu binfmt for multiarch... >&2 docker run \ --privileged \ --userns=host \ @@ -48,5 +81,8 @@ docker run \ "tonistiigi/binfmt:${QEMU_BINFMT_TAG}" \ --install all +echo Cleaning up docker images... >&2 systemctl start docker-low-disk-gc.service + +echo Restarting docker daemon... >&2 systemctl restart docker diff --git a/packer/linux/conf/bin/bk-install-elastic-stack.sh b/packer/linux/conf/bin/bk-install-elastic-stack.sh index 8c053d928..5e2f08332 100755 --- a/packer/linux/conf/bin/bk-install-elastic-stack.sh +++ b/packer/linux/conf/bin/bk-install-elastic-stack.sh @@ -1,47 +1,70 @@ -#!/bin/bash -set -euxo pipefail +#!/usr/bin/env bash -## Installs the Buildkite Agent, run from the CloudFormation template +set -Eeuo pipefail -# Write to system console and to our log file -# See https://alestic.com/2010/12/ec2-user-data-output/ -exec > >(tee -a /var/log/elastic-stack.log | logger -t user-data -s 2>/dev/console) 2>&1 +## Installs the Buildkite Agent, run from the CloudFormation template on_error() { - local exitCode="$?" - local errorLine="$1" + local exit_code="$?" + local error_line="$1" + + echo "${BASH_SOURCE[0]} errored with exit code ${exit_code} on line ${error_line}." - if [[ $exitCode != 0 ]]; then - aws autoscaling set-instance-health --instance-id "$INSTANCE_ID" --health-status Unhealthy || true + if [[ $exit_code != 0 ]]; then + if ! aws autoscaling set-instance-health \ + --instance-id "$INSTANCE_ID" \ + --health-status Unhealthy + then + echo Failed to set instance health to unhealthy >&2 + fi fi cfn-signal \ --region "$AWS_REGION" \ --stack "$BUILDKITE_STACK_NAME" \ - --reason "Error on line $errorLine: $(tail -n 1 /var/log/elastic-stack.log)" \ + --reason "Error on line $error_line: $(tail -n 1 /var/log/elastic-stack.log)" \ --resource "AgentAutoScaleGroup" \ - --exit-code "$exitCode" + --exit-code "$exit_code" + + exit "$exit_code" } trap 'on_error $LINENO' ERR +on_exit() { + echo "${BASH_SOURCE[0]} completed successfully." >&2 +} + +trap on_exit EXIT + +# Write to system console and to our log file +# See https://alestic.com/2010/12/ec2-user-data-output/ +exec > >(tee -a /var/log/elastic-stack.log | logger -t user-data -s 2>/dev/console) 2>&1 + +# This needs to happen first so that the error reporting works +token=$(curl -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 60" --fail --silent --show-error --location http://169.254.169.254/latest/api/token) +INSTANCE_ID=$(curl -H "X-aws-ec2-metadata-token: $token" --fail --silent --show-error --location http://169.254.169.254/latest/meta-data/instance-id) +echo "Detected INSTANCE_ID=$INSTANCE_ID" >&2 + # This script is run on every boot so that we can gracefully recover from hard failures (eg. kernel panics) during # any previous attempts. If a previous run is detected as started but not complete then we will fail this run and mark # the instance as unhealthy. STATUS_FILE=/var/log/elastic-stack-bootstrap-status check_status() { - if [[ -f ${STATUS_FILE} ]] ; then - if [[ "$(< ${STATUS_FILE})" == "Completed" ]] ; then - echo "Bootstrap already completed successfully" + echo "Checking status file $STATUS_FILE..." >&2 + + if [[ -f "$STATUS_FILE" ]]; then + if [[ "$(<"$STATUS_FILE")" == "Completed" ]]; then + echo Bootstrap already completed successfully. exit 0 else - echo "Bootstrap previously failed, will not continue from unknown state" + echo Bootstrap previously failed, will not continue from unknown state. return 1 fi fi - echo "Started" > ${STATUS_FILE} + echo "Started" >"$STATUS_FILE" } check_status @@ -51,21 +74,20 @@ case $(uname -m) in aarch64) ARCH=arm64;; *) ARCH=unknown;; esac - -# even though the token is only vaild for 60s, let's not leak it into the logs -set +x -token=$(curl -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 60" --fail --silent --show-error --location "http://169.254.169.254/latest/api/token") -INSTANCE_ID=$(curl -H "X-aws-ec2-metadata-token: $token" --fail --silent --show-error --location "http://169.254.169.254/latest/meta-data/instance-id") -set -x +echo "Detected ARCH=$ARCH" >&2 DOCKER_VERSION=$(docker --version | cut -f3 -d' ' | sed 's/,//') +echo "Detected DOCKER_VERSION=$DOCKER_VERSION" >&2 PLUGINS_ENABLED=() [[ $SECRETS_PLUGIN_ENABLED == "true" ]] && PLUGINS_ENABLED+=("secrets") [[ $ECR_PLUGIN_ENABLED == "true" ]] && PLUGINS_ENABLED+=("ecr") [[ $DOCKER_LOGIN_PLUGIN_ENABLED == "true" ]] && PLUGINS_ENABLED+=("docker-login") +echo "The following plugins will be enabled: ${PLUGINS_ENABLED[*]-}" >&2 # cfn-env is sourced by the environment hook in builds +# DO NOT PUT SECRETES IN HERE, they will appear in both the cloudwatch and +# build logs, and the agent's log redactor will not be able to redact them. # We will create it in two steps so that we don't need to go crazy with quoting and escaping. The # first sets up a helper function, the second populates the default values for some environment @@ -73,7 +95,8 @@ PLUGINS_ENABLED=() # Step 1: Helper function. Note that we clobber the target file and DO NOT apply variable # substitution, this is controlled by the double-quoted "EOF". -cat <<-"EOF" > /var/lib/buildkite-agent/cfn-env +echo Writing Phase 1/2 for /var/lib/buildkite-agent/cfn-env helper function... >&2 +cat <<-"EOF" >/var/lib/buildkite-agent/cfn-env # The Buildkite agent sets a number of variables such as AWS_DEFAULT_REGION to fixed values which # are determined at AMI-build-time. However, sometimes a user might want to override such variables # using an env: block in their pipeline.yml. This little helper is sets the environment variables @@ -103,7 +126,8 @@ EOF # Step 2: Populate the default variable values. This time, we append to the file, and allow # variable substitution. -cat <> /var/lib/buildkite-agent/cfn-env +echo Writing Phase 2/2 for /var/lib/buildkite-agent/cfn-env helper function... >&2 +cat <>/var/lib/buildkite-agent/cfn-env set_always "BUILDKITE_AGENTS_PER_INSTANCE" "$BUILDKITE_AGENTS_PER_INSTANCE" set_always "BUILDKITE_ECR_POLICY" "${BUILDKITE_ECR_POLICY:-none}" @@ -118,21 +142,34 @@ set_unless_present "AWS_DEFAULT_REGION" "$AWS_REGION" set_unless_present "AWS_REGION" "$AWS_REGION" EOF +# We warned about not putting secrets in this file +echo Wrote to /var/lib/buildkite-agent/cfn-env: >&2 +cat /var/lib/buildkite-agent/cfn-env >&2 +echo + if [[ "${BUILDKITE_AGENT_RELEASE}" == "edge" ]]; then - echo "Downloading buildkite-agent edge..." + echo Downloading buildkite-agent edge... >&2 curl -Lsf -o /usr/bin/buildkite-agent-edge \ "https://download.buildkite.com/agent/experimental/latest/buildkite-agent-linux-${ARCH}" chmod +x /usr/bin/buildkite-agent-edge buildkite-agent-edge --version +else + echo Not using buildkite-agent edge. >&2 fi if [[ "${BUILDKITE_ADDITIONAL_SUDO_PERMISSIONS}" != "" ]]; then - echo "buildkite-agent ALL=NOPASSWD: ${BUILDKITE_ADDITIONAL_SUDO_PERMISSIONS}" > /etc/sudoers.d/buildkite-agent-additional + echo "buildkite-agent ALL=NOPASSWD: ${BUILDKITE_ADDITIONAL_SUDO_PERMISSIONS}" \ + >/etc/sudoers.d/buildkite-agent-additional chmod 440 /etc/sudoers.d/buildkite-agent-additional + + echo Wrote to /etc/sudoers.d/buildkite-agent-additional... >&2 + cat /etc/sudoers.d/buildkite-agent-additional >&2 +else + echo No additional sudo permissions. >&2 fi # Choose the right agent binary -ln -sf "/usr/bin/buildkite-agent-${BUILDKITE_AGENT_RELEASE}" /usr/bin/buildkite-agent +ln -sf "/usr/bin/buildkite-agent-$BUILDKITE_AGENT_RELEASE" /usr/bin/buildkite-agent agent_metadata=( "queue=${BUILDKITE_QUEUE}" @@ -141,54 +178,92 @@ agent_metadata=( "buildkite-aws-stack=${BUILDKITE_STACK_VERSION}" ) -# Split on commas +echo "Initial agent metadata: ${agent_metadata[*]-}" >&2 if [[ -n "${BUILDKITE_AGENT_TAGS:-}" ]]; then - IFS=',' read -r -a extra_agent_metadata <<< "${BUILDKITE_AGENT_TAGS:-}" + IFS=',' read -r -a extra_agent_metadata <<<"${BUILDKITE_AGENT_TAGS:-}" agent_metadata=("${agent_metadata[@]}" "${extra_agent_metadata[@]}") fi +echo "Agent metadata after splitting commas: ${agent_metadata[*]-}" >&2 # Enable git-mirrors BUILDKITE_AGENT_GIT_MIRRORS_PATH="" if [[ "${BUILDKITE_AGENT_ENABLE_GIT_MIRRORS:-false}" == "true" ]]; then - BUILDKITE_AGENT_GIT_MIRRORS_PATH="/var/lib/buildkite-agent/git-mirrors" + BUILDKITE_AGENT_GIT_MIRRORS_PATH=/var/lib/buildkite-agent/git-mirrors + echo "git-mirrors enabled at $BUILDKITE_AGENT_GIT_MIRRORS_PATH" >&2 mkdir -p "${BUILDKITE_AGENT_GIT_MIRRORS_PATH}" if [[ "${BUILDKITE_ENABLE_INSTANCE_STORAGE:-false}" == "true" ]]; then + echo Mounting git-mirrors to instance storage... >&2 + EPHEMERAL_GIT_MIRRORS_PATH="/mnt/ephemeral/git-mirrors" + echo "Creating ephemeral git-mirrors direcotry at $EPHEMERAL_GIT_MIRRORS_PATH" >&2 mkdir -p "${EPHEMERAL_GIT_MIRRORS_PATH}" + echo Bind mounting ephemeral git-mirror directory to git-mirrors path... >&2 mount -o bind "${EPHEMERAL_GIT_MIRRORS_PATH}" "${BUILDKITE_AGENT_GIT_MIRRORS_PATH}" + + echo Writing bind mount to fstab... >&2 echo "${EPHEMERAL_GIT_MIRRORS_PATH} ${BUILDKITE_AGENT_GIT_MIRRORS_PATH} none defaults,bind 0 0" >>/etc/fstab + + echo fstab is now: >&2 + cat /etc/fstab >&2 + echo + else + echo Not mounting git-mirrors to instance storage as instance storage is disabled. >&2 fi - chown buildkite-agent: "${BUILDKITE_AGENT_GIT_MIRRORS_PATH}" + echo Setting ownership of git-mirrors directory to buildkite-agent... >&2 + chown buildkite-agent: "$BUILDKITE_AGENT_GIT_MIRRORS_PATH" +else + echo git-mirrors disabled. >&2 fi +echo "BUILDKITE_AGENT_GIT_MIRRORS_PATH is $BUILDKITE_AGENT_GIT_MIRRORS_PATH" >&2 BUILDKITE_AGENT_BUILD_PATH="/var/lib/buildkite-agent/builds" mkdir -p "${BUILDKITE_AGENT_BUILD_PATH}" -if [ "${BUILDKITE_ENABLE_INSTANCE_STORAGE:-false}" == "true" ]; then +if [[ "${BUILDKITE_ENABLE_INSTANCE_STORAGE:-false}" == "true" ]]; then + echo Bind mounting build path to instance storage... >&2 + EPHEMERAL_BUILD_PATH="/mnt/ephemeral/builds" mkdir -p "${EPHEMERAL_BUILD_PATH}" + mount -o bind "${EPHEMERAL_BUILD_PATH}" "${BUILDKITE_AGENT_BUILD_PATH}" echo "${EPHEMERAL_BUILD_PATH} ${BUILDKITE_AGENT_BUILD_PATH} none defaults,bind 0 0" >>/etc/fstab + + echo fstab is now: >&2 + cat /etc/fstab >&2 +else + echo Not mounting build path to instance storage as instance storage is disabled. >&2 fi -chown buildkite-agent: "${BUILDKITE_AGENT_BUILD_PATH}" + +echo Setting ownership of build path to buildkite-agent. >&2 +chown buildkite-agent: "$BUILDKITE_AGENT_BUILD_PATH" # Either you can have timestamp-lines xor ansi-timestamps. -# There's no technical reason you can't have both, its a pragmatic decision to +# There's no technical reason you can't have both, it's a pragmatic decision to # simplify the avaliable parameters on the stack -if [[ "$BUILDKITE_AGENT_TIMESTAMP_LINES" == "true" ]]; then +if [[ ${BUILDKITE_AGENT_TIMESTAMP_LINES:-"false"} == "true" ]]; then + BUILDKITE_AGENT_TIMESTAMPS_LINES="true" BUILDKITE_AGENT_NO_ANSI_TIMESTAMPS="true" else + BUILDKITE_AGENT_TIMESTAMPS_LINES="false" BUILDKITE_AGENT_NO_ANSI_TIMESTAMPS="false" fi - -set +x # Don't leak the agent token into logs -echo "Setting \$BUILDKITE_AGENT_TOKEN to the value stored in the SSM Parameter $BUILDKITE_AGENT_TOKEN_PATH" -BUILDKITE_AGENT_TOKEN="$(aws ssm get-parameter --name "${BUILDKITE_AGENT_TOKEN_PATH}" --with-decryption --query Parameter.Value --output text)" -set -x - -cat < /etc/buildkite-agent/buildkite-agent.cfg +echo Set \$BUILDKITE_AGENT_NO_ANSI_TIMESTAMPS to \$BUILDKITE_AGENT_TIMESTAMP_LINES >&2 +echo "BUILDKITE_AGENT_TIMESTAMP_LINES is $BUILDKITE_AGENT_TIMESTAMPS_LINES" >&2 +echo "BUILDKITE_AGENT_NO_ANSI_TIMESTAMPS is $BUILDKITE_AGENT_NO_ANSI_TIMESTAMPS" >&2 + +echo "Setting \$BUILDKITE_AGENT_TOKEN from SSM Parameter $BUILDKITE_AGENT_TOKEN_PATH" >&2 +BUILDKITE_AGENT_TOKEN="$( + aws ssm get-parameter \ + --name "$BUILDKITE_AGENT_TOKEN_PATH" \ + --with-decryption \ + --query Parameter.Value \ + --output text +)" + +# DO NOT write this file to logs. It contains secrets. +cat </etc/buildkite-agent/buildkite-agent.cfg name="${BUILDKITE_STACK_NAME}-${INSTANCE_ID}-%spawn" token="${BUILDKITE_AGENT_TOKEN}" tags=$(IFS=, ; echo "${agent_metadata[*]}") @@ -210,68 +285,86 @@ cancel-grace-period=60 EOF if [[ "${BUILDKITE_ENV_FILE_URL}" != "" ]]; then + echo "Fetching env file from ${BUILDKITE_ENV_FILE_URL}..." >&2 /usr/local/bin/bk-fetch.sh "${BUILDKITE_ENV_FILE_URL}" /var/lib/buildkite-agent/env +else + echo No env file to fetch. >&2 fi +echo Setting ownership of /etc/buildkite-agent/buildkite-agent.cfg to buildkite-agent... >&2 chown buildkite-agent: /etc/buildkite-agent/buildkite-agent.cfg -if [[ -n "${BUILDKITE_AUTHORIZED_USERS_URL}" ]]; then - cat <<-EOF > /usr/local/bin/refresh_authorized_keys - /usr/local/bin/bk-fetch.sh "${BUILDKITE_AUTHORIZED_USERS_URL}" /tmp/authorized_keys +if [[ -n "$BUILDKITE_AUTHORIZED_USERS_URL" ]]; then + echo Writing authorized user fetching script... >&2 + cat <<-EOF | tee /usr/local/bin/refresh_authorized_keys + /usr/local/bin/bk-fetch.sh "$BUILDKITE_AUTHORIZED_USERS_URL" /tmp/authorized_keys mv /tmp/authorized_keys /home/ec2-user/.ssh/authorized_keys chmod 600 /home/ec2-user/.ssh/authorized_keys chown ec2-user: /home/ec2-user/.ssh/authorized_keys EOF + + echo Setting ownership of /usr/local/bin/refresh_authorized_keys to root... >&2 chmod +x /usr/local/bin/refresh_authorized_keys + + echo Running authorized user fetching script... >&2 /usr/local/bin/refresh_authorized_keys + + echo Enabling authorized user fetching timer... >&2 systemctl enable refresh_authorized_keys.timer +else + echo No authorized users to fetch >&2 fi -# Finish git lfs install +echo Installing git-lfs for buildkite-agent user... >&2 su buildkite-agent -l -c 'git lfs install' -if [[ -n "${BUILDKITE_ELASTIC_BOOTSTRAP_SCRIPT}" ]]; then - /usr/local/bin/bk-fetch.sh "${BUILDKITE_ELASTIC_BOOTSTRAP_SCRIPT}" /tmp/elastic_bootstrap - bash < /tmp/elastic_bootstrap +if [[ -n "$BUILDKITE_ELASTIC_BOOTSTRAP_SCRIPT" ]]; then + echo "Running bootstrap script from $BUILDKITE_ELASTIC_BOOTSTRAP_SCRIPT..." >&2 + /usr/local/bin/bk-fetch.sh "$BUILDKITE_ELASTIC_BOOTSTRAP_SCRIPT" /tmp/elastic_bootstrap + bash &2 fi -cat < /etc/lifecycled -AWS_REGION=${AWS_REGION} +echo Writing lifecycled configuration... >&2 +cat <&2 systemctl enable --now lifecycled.service -# wait for docker to start -next_wait_time=0 -until docker ps || [ $next_wait_time -eq 5 ]; do - sleep $(( next_wait_time++ )) -done - +echo Waiting for docker to start... >&2 check_docker() { - if ! docker ps ; then - echo "Failed to contact docker" + if ! docker ps >/dev/null; then + echo "Failed to contact docker." return 1 fi } +next_wait_time=0 +until check_docker || [[ $next_wait_time -eq 5 ]]; do + sleep $((next_wait_time++)) +done + +echo "Waited $next_wait_time times for docker to start." >&2 +echo We will exit if it still has not started. >&2 check_docker -# start buildkite-agent +echo Starting buildkite-agent... >&2 systemctl enable --now buildkite-agent -# let the stack know that this host has been initialized successfully +echo Signaling success to CloudFormation... >&2 +# This will fail if the stack has already completed, for instance if there is a min size +# of 1 and this is the 2nd instance. This is ok, so we just ignore the error cfn-signal \ --region "$AWS_REGION" \ --stack "$BUILDKITE_STACK_NAME" \ --resource "AgentAutoScaleGroup" \ - --exit-code 0 || ( - # This will fail if the stack has already completed, for instance if there is a min size - # of 1 and this is the 2nd instance. This is ok, so we just ignore the erro - echo "Signal failed" - ) + --exit-code 0 || echo Signal failed # Record bootstrap as complete (this should be the last step in this file) -echo "Completed" > ${STATUS_FILE} +echo "Completed" >"$STATUS_FILE" diff --git a/packer/linux/conf/bin/bk-mount-instance-storage.sh b/packer/linux/conf/bin/bk-mount-instance-storage.sh index 350c61249..3f72366e7 100755 --- a/packer/linux/conf/bin/bk-mount-instance-storage.sh +++ b/packer/linux/conf/bin/bk-mount-instance-storage.sh @@ -1,5 +1,22 @@ -#!/bin/bash -set -euxo pipefail +#!/usr/bin/env bash + +set -Eeuo pipefail + +on_error() { + local exit_code="$?" + local error_line="$1" + + echo "${BASH_SOURCE[0]} errored with exit code ${exit_code} on line ${error_line}." + exit "$exit_code" +} + +trap 'on_error $LINENO' ERR + +on_exit() { + echo "${BASH_SOURCE[0]} completed successfully." >&2 +} + +trap on_exit EXIT # Write to system console and to our log file # See https://alestic.com/2010/12/ec2-user-data-output/ @@ -9,24 +26,27 @@ exec > >(tee -a /var/log/elastic-stack.log | logger -t user-data -s 2>/dev/conso # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html if [[ "${BUILDKITE_ENABLE_INSTANCE_STORAGE:-false}" != "true" ]]; then - echo Skipping mounting instance storage >&2 + echo Skipping mounting instance storage. >&2 exit 0 fi +echo Mounting instance storage... >&2 + #shellcheck disable=SC2207 devices=($(nvme list | grep "Amazon EC2 NVMe Instance Storage" | cut -f1 -d' ' || true)) - if [[ -z "${devices[*]}" ]]; then echo No NVMe drives to mount. >&2 - echo Please check that your instance type supports instance storage >&2 + echo Please check that your instance type supports instance storage. >&2 exit 0 fi +echo "Found NVMe devices: ${devices[*]}." >&2 + if [[ "${#devices[@]}" -eq 1 ]]; then - echo "Mounting instance storage device directly" >&2 + echo Mounting instance storage device directly... >&2 logicalname="${devices[0]}" elif [[ "${#devices[@]}" -gt 1 ]]; then - echo "Mounting instance storage devices using software RAID" >&2 + echo Mounting instance storage devices using software RAID... >&2 logicalname=/dev/md0 mdadm \ @@ -34,29 +54,48 @@ elif [[ "${#devices[@]}" -gt 1 ]]; then --level=0 \ -c256 \ --raid-devices="${#devices[@]}" "${devices[@]}" + echo "Mounted ${devices[*]} to $logicalname." >&2 echo "DEVICE ${devices[*]}" > /etc/mdadm.conf + echo Created /etc/mdadm.conf: >&2 + cat /etc/mdadm.conf >&2 mdadm --detail --scan >> /etc/mdadm.conf + echo Updated /etc/mdadm.conf: >&2 + cat /etc/mdadm.conf >&2 + + echo Setting readahead to 64k... >&2 blockdev --setra 65536 "$logicalname" +else + echo Expected at least once nvme device, found: "${devices[*]}" >&2 + echo + echo This error is unexpected. Please contact support@buildkite.com >&2 + exit 1 fi -# Make an ext4 file system, [-F]orce creation, don’t TRIM at fs creation time -# (-E nodiscard) -echo "Formatting $logicalname as ext4" >&2 +echo "Formatting $logicalname as ext4..." >&2 +# Make an ext4 file system, [-F]orce creation, don’t TRIM at fs creation time (-E nodiscard) mkfs.ext4 -F -E nodiscard "$logicalname" > /dev/null devicemount=/mnt/ephemeral - -echo "Mounting $logicalname to $devicemount" >&2 - +echo "Mounting $logicalname to $devicemount..." >&2 fs_type="ext4" mount_options="defaults,noatime" mkdir -p "$devicemount" -mount -t "${fs_type}" -o "${mount_options}" "$logicalname" "$devicemount" +mount -t "$fs_type" -o "$mount_options" "$logicalname" "$devicemount" -if [ ! -f /etc/fstab.backup ]; then +if [[ ! -f /etc/fstab.backup ]]; then + echo Backing up /etc/fstab to /etc/fstab.backup... >&2 cp -rP /etc/fstab /etc/fstab.backup - echo "$logicalname $devicemount ${fs_type} ${mount_options} 0 0" >> /etc/fstab + + fstab_line="$logicalname $devicemount ${fs_type} ${mount_options} 0 0" + echo "Appending $fstab_line to /etc/fstab..." >&2 + echo "$fstab_line" >> /etc/fstab + + echo Appened to /etc/fstab: >&2 + cat /etc/fstab >&2 +else + echo /etc/fstab.backup already exists. Not mofidying /etc/fstab: >&2 + cat /etc/fstab >&2 fi From e8184d8ff077abdce8cdd0ee2336105196762aed Mon Sep 17 00:00:00 2001 From: Narthana Epa Date: Mon, 2 Oct 2023 17:42:14 +1100 Subject: [PATCH 2/4] Remove redirecting echos and cats to stderr The command that sends the scripts output streams to the loggers merges stdout and stderr anyway, so there is no point in writing to stderr in the script. This is the logging command: ``` exec > >(tee -a /var/log/elastic-stack.log | logger -t user-data -s 2>/dev/console) 2>&1 ``` it is present in each of these scripts. --- packer/linux/conf/bin/bk-configure-docker.sh | 30 ++--- .../conf/bin/bk-install-elastic-stack.sh | 113 +++++++++--------- .../conf/bin/bk-mount-instance-storage.sh | 48 ++++---- 3 files changed, 95 insertions(+), 96 deletions(-) diff --git a/packer/linux/conf/bin/bk-configure-docker.sh b/packer/linux/conf/bin/bk-configure-docker.sh index 17062f538..cd73c07df 100755 --- a/packer/linux/conf/bin/bk-configure-docker.sh +++ b/packer/linux/conf/bin/bk-configure-docker.sh @@ -14,7 +14,7 @@ on_error() { trap 'on_error $LINENO' ERR on_exit() { - echo "${BASH_SOURCE[0]} completed successfully." >&2 + echo "${BASH_SOURCE[0]} completed successfully." } trap on_exit EXIT @@ -26,54 +26,54 @@ trap on_exit EXIT exec > >(tee -a /var/log/elastic-stack.log | logger -t user-data -s 2>/dev/console) 2>&1 -echo Reading variables from AMI creation... >&2 +echo Reading variables from AMI creation... # shellcheck disable=SC1091 source /usr/local/lib/bk-configure-docker.sh if [[ "${DOCKER_USERNS_REMAP:-false}" == "true" ]]; then - echo Configuring user namespace remapping... >&2 + echo Configuring user namespace remapping... cat <<< "$(jq '."userns-remap"="buildkite-agent"' /etc/docker/daemon.json)" > /etc/docker/daemon.json - echo Writing subuid... >&2 + echo Writing subuid... cat <&2 + echo Writing subgid... cat <&2 + echo User namespace remapping not configured. fi if [[ "${DOCKER_EXPERIMENTAL:-false}" == "true" ]]; then - echo Configuring experiment flag for docker daemon... >&2 + echo Configuring experiment flag for docker daemon... cat <<< "$(jq '.experimental=true' /etc/docker/daemon.json)" > /etc/docker/daemon.json else - echo Experiment flag for docker daemon not configured. >&2 + echo Experiment flag for docker daemon not configured. fi if [[ "${BUILDKITE_ENABLE_INSTANCE_STORAGE:-false}" == "true" ]]; then - echo Creating docker root directory in instance storage... >&2 + echo Creating docker root directory in instance storage... mkdir -p /mnt/ephemeral/docker - echo Configuring docker root directory to be in instance storage... >&2 + echo Configuring docker root directory to be in instance storage... cat <<< "$(jq '."data-root"="/mnt/ephemeral/docker"' /etc/docker/daemon.json)" > /etc/docker/daemon.json else - echo Instance storage not configured. >&2 + echo Instance storage not configured. fi -echo Customising docker IP address pools... >&2 +echo Customising docker IP address pools... cat <<<"$(jq \ '."default-address-pools"=[{"base":"172.17.0.0/12","size":20},{"base":"192.168.0.0/16","size":24}]' \ /etc/docker/daemon.json \ )" >/etc/docker/daemon.json # See https://docs.docker.com/build/building/multi-platform/ -echo Installing qemu binfmt for multiarch... >&2 +echo Installing qemu binfmt for multiarch... docker run \ --privileged \ --userns=host \ @@ -81,8 +81,8 @@ docker run \ "tonistiigi/binfmt:${QEMU_BINFMT_TAG}" \ --install all -echo Cleaning up docker images... >&2 +echo Cleaning up docker images... systemctl start docker-low-disk-gc.service -echo Restarting docker daemon... >&2 +echo Restarting docker daemon... systemctl restart docker diff --git a/packer/linux/conf/bin/bk-install-elastic-stack.sh b/packer/linux/conf/bin/bk-install-elastic-stack.sh index 5e2f08332..23f62e5a0 100755 --- a/packer/linux/conf/bin/bk-install-elastic-stack.sh +++ b/packer/linux/conf/bin/bk-install-elastic-stack.sh @@ -15,7 +15,7 @@ on_error() { --instance-id "$INSTANCE_ID" \ --health-status Unhealthy then - echo Failed to set instance health to unhealthy >&2 + echo Failed to set instance health to unhealthy. fi fi @@ -32,7 +32,7 @@ on_error() { trap 'on_error $LINENO' ERR on_exit() { - echo "${BASH_SOURCE[0]} completed successfully." >&2 + echo "${BASH_SOURCE[0]} completed successfully." } trap on_exit EXIT @@ -44,7 +44,7 @@ exec > >(tee -a /var/log/elastic-stack.log | logger -t user-data -s 2>/dev/conso # This needs to happen first so that the error reporting works token=$(curl -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 60" --fail --silent --show-error --location http://169.254.169.254/latest/api/token) INSTANCE_ID=$(curl -H "X-aws-ec2-metadata-token: $token" --fail --silent --show-error --location http://169.254.169.254/latest/meta-data/instance-id) -echo "Detected INSTANCE_ID=$INSTANCE_ID" >&2 +echo "Detected INSTANCE_ID=$INSTANCE_ID" # This script is run on every boot so that we can gracefully recover from hard failures (eg. kernel panics) during # any previous attempts. If a previous run is detected as started but not complete then we will fail this run and mark @@ -52,7 +52,7 @@ echo "Detected INSTANCE_ID=$INSTANCE_ID" >&2 STATUS_FILE=/var/log/elastic-stack-bootstrap-status check_status() { - echo "Checking status file $STATUS_FILE..." >&2 + echo "Checking status file $STATUS_FILE..." if [[ -f "$STATUS_FILE" ]]; then if [[ "$(<"$STATUS_FILE")" == "Completed" ]]; then @@ -74,16 +74,16 @@ case $(uname -m) in aarch64) ARCH=arm64;; *) ARCH=unknown;; esac -echo "Detected ARCH=$ARCH" >&2 +echo "Detected ARCH=$ARCH" DOCKER_VERSION=$(docker --version | cut -f3 -d' ' | sed 's/,//') -echo "Detected DOCKER_VERSION=$DOCKER_VERSION" >&2 +echo "Detected DOCKER_VERSION=$DOCKER_VERSION" PLUGINS_ENABLED=() [[ $SECRETS_PLUGIN_ENABLED == "true" ]] && PLUGINS_ENABLED+=("secrets") [[ $ECR_PLUGIN_ENABLED == "true" ]] && PLUGINS_ENABLED+=("ecr") [[ $DOCKER_LOGIN_PLUGIN_ENABLED == "true" ]] && PLUGINS_ENABLED+=("docker-login") -echo "The following plugins will be enabled: ${PLUGINS_ENABLED[*]-}" >&2 +echo "The following plugins will be enabled: ${PLUGINS_ENABLED[*]-}" # cfn-env is sourced by the environment hook in builds # DO NOT PUT SECRETES IN HERE, they will appear in both the cloudwatch and @@ -95,7 +95,7 @@ echo "The following plugins will be enabled: ${PLUGINS_ENABLED[*]-}" >&2 # Step 1: Helper function. Note that we clobber the target file and DO NOT apply variable # substitution, this is controlled by the double-quoted "EOF". -echo Writing Phase 1/2 for /var/lib/buildkite-agent/cfn-env helper function... >&2 +echo Writing Phase 1/2 for /var/lib/buildkite-agent/cfn-env helper function... cat <<-"EOF" >/var/lib/buildkite-agent/cfn-env # The Buildkite agent sets a number of variables such as AWS_DEFAULT_REGION to fixed values which # are determined at AMI-build-time. However, sometimes a user might want to override such variables @@ -126,7 +126,7 @@ EOF # Step 2: Populate the default variable values. This time, we append to the file, and allow # variable substitution. -echo Writing Phase 2/2 for /var/lib/buildkite-agent/cfn-env helper function... >&2 +echo Writing Phase 2/2 for /var/lib/buildkite-agent/cfn-env helper function... cat <>/var/lib/buildkite-agent/cfn-env set_always "BUILDKITE_AGENTS_PER_INSTANCE" "$BUILDKITE_AGENTS_PER_INSTANCE" @@ -143,18 +143,18 @@ set_unless_present "AWS_REGION" "$AWS_REGION" EOF # We warned about not putting secrets in this file -echo Wrote to /var/lib/buildkite-agent/cfn-env: >&2 -cat /var/lib/buildkite-agent/cfn-env >&2 +echo Wrote to /var/lib/buildkite-agent/cfn-env: +cat /var/lib/buildkite-agent/cfn-env echo if [[ "${BUILDKITE_AGENT_RELEASE}" == "edge" ]]; then - echo Downloading buildkite-agent edge... >&2 + echo Downloading buildkite-agent edge... curl -Lsf -o /usr/bin/buildkite-agent-edge \ "https://download.buildkite.com/agent/experimental/latest/buildkite-agent-linux-${ARCH}" chmod +x /usr/bin/buildkite-agent-edge buildkite-agent-edge --version else - echo Not using buildkite-agent edge. >&2 + echo Not using buildkite-agent edge. fi if [[ "${BUILDKITE_ADDITIONAL_SUDO_PERMISSIONS}" != "" ]]; then @@ -162,10 +162,10 @@ if [[ "${BUILDKITE_ADDITIONAL_SUDO_PERMISSIONS}" != "" ]]; then >/etc/sudoers.d/buildkite-agent-additional chmod 440 /etc/sudoers.d/buildkite-agent-additional - echo Wrote to /etc/sudoers.d/buildkite-agent-additional... >&2 - cat /etc/sudoers.d/buildkite-agent-additional >&2 + echo Wrote to /etc/sudoers.d/buildkite-agent-additional... + cat /etc/sudoers.d/buildkite-agent-additional else - echo No additional sudo permissions. >&2 + echo No additional sudo permissions. fi # Choose the right agent binary @@ -178,51 +178,51 @@ agent_metadata=( "buildkite-aws-stack=${BUILDKITE_STACK_VERSION}" ) -echo "Initial agent metadata: ${agent_metadata[*]-}" >&2 +echo "Initial agent metadata: ${agent_metadata[*]-}" if [[ -n "${BUILDKITE_AGENT_TAGS:-}" ]]; then IFS=',' read -r -a extra_agent_metadata <<<"${BUILDKITE_AGENT_TAGS:-}" agent_metadata=("${agent_metadata[@]}" "${extra_agent_metadata[@]}") fi -echo "Agent metadata after splitting commas: ${agent_metadata[*]-}" >&2 +echo "Agent metadata after splitting commas: ${agent_metadata[*]-}" # Enable git-mirrors BUILDKITE_AGENT_GIT_MIRRORS_PATH="" if [[ "${BUILDKITE_AGENT_ENABLE_GIT_MIRRORS:-false}" == "true" ]]; then BUILDKITE_AGENT_GIT_MIRRORS_PATH=/var/lib/buildkite-agent/git-mirrors - echo "git-mirrors enabled at $BUILDKITE_AGENT_GIT_MIRRORS_PATH" >&2 + echo "git-mirrors enabled at $BUILDKITE_AGENT_GIT_MIRRORS_PATH" mkdir -p "${BUILDKITE_AGENT_GIT_MIRRORS_PATH}" if [[ "${BUILDKITE_ENABLE_INSTANCE_STORAGE:-false}" == "true" ]]; then - echo Mounting git-mirrors to instance storage... >&2 + echo Mounting git-mirrors to instance storage... EPHEMERAL_GIT_MIRRORS_PATH="/mnt/ephemeral/git-mirrors" - echo "Creating ephemeral git-mirrors direcotry at $EPHEMERAL_GIT_MIRRORS_PATH" >&2 + echo "Creating ephemeral git-mirrors direcotry at $EPHEMERAL_GIT_MIRRORS_PATH" mkdir -p "${EPHEMERAL_GIT_MIRRORS_PATH}" - echo Bind mounting ephemeral git-mirror directory to git-mirrors path... >&2 + echo Bind mounting ephemeral git-mirror directory to git-mirrors path... mount -o bind "${EPHEMERAL_GIT_MIRRORS_PATH}" "${BUILDKITE_AGENT_GIT_MIRRORS_PATH}" - echo Writing bind mount to fstab... >&2 + echo Writing bind mount to fstab... echo "${EPHEMERAL_GIT_MIRRORS_PATH} ${BUILDKITE_AGENT_GIT_MIRRORS_PATH} none defaults,bind 0 0" >>/etc/fstab - echo fstab is now: >&2 - cat /etc/fstab >&2 + echo fstab is now: + cat /etc/fstab echo else - echo Not mounting git-mirrors to instance storage as instance storage is disabled. >&2 + echo Not mounting git-mirrors to instance storage as instance storage is disabled. fi - echo Setting ownership of git-mirrors directory to buildkite-agent... >&2 + echo Setting ownership of git-mirrors directory to buildkite-agent... chown buildkite-agent: "$BUILDKITE_AGENT_GIT_MIRRORS_PATH" else - echo git-mirrors disabled. >&2 + echo git-mirrors disabled. fi -echo "BUILDKITE_AGENT_GIT_MIRRORS_PATH is $BUILDKITE_AGENT_GIT_MIRRORS_PATH" >&2 +echo "BUILDKITE_AGENT_GIT_MIRRORS_PATH is $BUILDKITE_AGENT_GIT_MIRRORS_PATH" BUILDKITE_AGENT_BUILD_PATH="/var/lib/buildkite-agent/builds" mkdir -p "${BUILDKITE_AGENT_BUILD_PATH}" if [[ "${BUILDKITE_ENABLE_INSTANCE_STORAGE:-false}" == "true" ]]; then - echo Bind mounting build path to instance storage... >&2 + echo Bind mounting build path to instance storage... EPHEMERAL_BUILD_PATH="/mnt/ephemeral/builds" mkdir -p "${EPHEMERAL_BUILD_PATH}" @@ -230,13 +230,13 @@ if [[ "${BUILDKITE_ENABLE_INSTANCE_STORAGE:-false}" == "true" ]]; then mount -o bind "${EPHEMERAL_BUILD_PATH}" "${BUILDKITE_AGENT_BUILD_PATH}" echo "${EPHEMERAL_BUILD_PATH} ${BUILDKITE_AGENT_BUILD_PATH} none defaults,bind 0 0" >>/etc/fstab - echo fstab is now: >&2 - cat /etc/fstab >&2 + echo fstab is now: + cat /etc/fstab else - echo Not mounting build path to instance storage as instance storage is disabled. >&2 + echo Not mounting build path to instance storage as instance storage is disabled. fi -echo Setting ownership of build path to buildkite-agent. >&2 +echo Setting ownership of build path to buildkite-agent. chown buildkite-agent: "$BUILDKITE_AGENT_BUILD_PATH" # Either you can have timestamp-lines xor ansi-timestamps. @@ -249,11 +249,11 @@ else BUILDKITE_AGENT_TIMESTAMPS_LINES="false" BUILDKITE_AGENT_NO_ANSI_TIMESTAMPS="false" fi -echo Set \$BUILDKITE_AGENT_NO_ANSI_TIMESTAMPS to \$BUILDKITE_AGENT_TIMESTAMP_LINES >&2 -echo "BUILDKITE_AGENT_TIMESTAMP_LINES is $BUILDKITE_AGENT_TIMESTAMPS_LINES" >&2 -echo "BUILDKITE_AGENT_NO_ANSI_TIMESTAMPS is $BUILDKITE_AGENT_NO_ANSI_TIMESTAMPS" >&2 +echo Setting \$BUILDKITE_AGENT_NO_ANSI_TIMESTAMPS to \$BUILDKITE_AGENT_TIMESTAMP_LINES +echo "BUILDKITE_AGENT_TIMESTAMP_LINES is $BUILDKITE_AGENT_TIMESTAMPS_LINES" +echo "BUILDKITE_AGENT_NO_ANSI_TIMESTAMPS is $BUILDKITE_AGENT_NO_ANSI_TIMESTAMPS" -echo "Setting \$BUILDKITE_AGENT_TOKEN from SSM Parameter $BUILDKITE_AGENT_TOKEN_PATH" >&2 +echo "Setting \$BUILDKITE_AGENT_TOKEN from SSM Parameter $BUILDKITE_AGENT_TOKEN_PATH" BUILDKITE_AGENT_TOKEN="$( aws ssm get-parameter \ --name "$BUILDKITE_AGENT_TOKEN_PATH" \ @@ -285,17 +285,17 @@ cancel-grace-period=60 EOF if [[ "${BUILDKITE_ENV_FILE_URL}" != "" ]]; then - echo "Fetching env file from ${BUILDKITE_ENV_FILE_URL}..." >&2 + echo "Fetching env file from ${BUILDKITE_ENV_FILE_URL}..." /usr/local/bin/bk-fetch.sh "${BUILDKITE_ENV_FILE_URL}" /var/lib/buildkite-agent/env else - echo No env file to fetch. >&2 + echo No env file to fetch. fi -echo Setting ownership of /etc/buildkite-agent/buildkite-agent.cfg to buildkite-agent... >&2 +echo Setting ownership of /etc/buildkite-agent/buildkite-agent.cfg to buildkite-agent... chown buildkite-agent: /etc/buildkite-agent/buildkite-agent.cfg if [[ -n "$BUILDKITE_AUTHORIZED_USERS_URL" ]]; then - echo Writing authorized user fetching script... >&2 + echo Writing authorized user fetching script... cat <<-EOF | tee /usr/local/bin/refresh_authorized_keys /usr/local/bin/bk-fetch.sh "$BUILDKITE_AUTHORIZED_USERS_URL" /tmp/authorized_keys mv /tmp/authorized_keys /home/ec2-user/.ssh/authorized_keys @@ -303,44 +303,44 @@ if [[ -n "$BUILDKITE_AUTHORIZED_USERS_URL" ]]; then chown ec2-user: /home/ec2-user/.ssh/authorized_keys EOF - echo Setting ownership of /usr/local/bin/refresh_authorized_keys to root... >&2 + echo Setting ownership of /usr/local/bin/refresh_authorized_keys to root... chmod +x /usr/local/bin/refresh_authorized_keys - echo Running authorized user fetching script... >&2 + echo Running authorized user fetching script... /usr/local/bin/refresh_authorized_keys - echo Enabling authorized user fetching timer... >&2 + echo Enabling authorized user fetching timer... systemctl enable refresh_authorized_keys.timer else - echo No authorized users to fetch >&2 + echo No authorized users to fetch. fi -echo Installing git-lfs for buildkite-agent user... >&2 +echo Installing git-lfs for buildkite-agent user... su buildkite-agent -l -c 'git lfs install' if [[ -n "$BUILDKITE_ELASTIC_BOOTSTRAP_SCRIPT" ]]; then - echo "Running bootstrap script from $BUILDKITE_ELASTIC_BOOTSTRAP_SCRIPT..." >&2 + echo "Running bootstrap script from $BUILDKITE_ELASTIC_BOOTSTRAP_SCRIPT..." /usr/local/bin/bk-fetch.sh "$BUILDKITE_ELASTIC_BOOTSTRAP_SCRIPT" /tmp/elastic_bootstrap bash &2 + echo No bootstrap script to run. fi -echo Writing lifecycled configuration... >&2 +echo Writing lifecycled configuration... cat <&2 +echo Starting lifecycled... systemctl enable --now lifecycled.service -echo Waiting for docker to start... >&2 +echo Waiting for docker to start... check_docker() { if ! docker ps >/dev/null; then - echo "Failed to contact docker." + echo Failed to contact docker. return 1 fi } @@ -350,14 +350,13 @@ until check_docker || [[ $next_wait_time -eq 5 ]]; do sleep $((next_wait_time++)) done -echo "Waited $next_wait_time times for docker to start." >&2 -echo We will exit if it still has not started. >&2 +echo "Waited $next_wait_time times for docker to start. We will exit if it still has not started." check_docker -echo Starting buildkite-agent... >&2 +echo Starting buildkite-agent... systemctl enable --now buildkite-agent -echo Signaling success to CloudFormation... >&2 +echo Signaling success to CloudFormation... # This will fail if the stack has already completed, for instance if there is a min size # of 1 and this is the 2nd instance. This is ok, so we just ignore the error cfn-signal \ diff --git a/packer/linux/conf/bin/bk-mount-instance-storage.sh b/packer/linux/conf/bin/bk-mount-instance-storage.sh index 3f72366e7..37507afcb 100755 --- a/packer/linux/conf/bin/bk-mount-instance-storage.sh +++ b/packer/linux/conf/bin/bk-mount-instance-storage.sh @@ -13,7 +13,7 @@ on_error() { trap 'on_error $LINENO' ERR on_exit() { - echo "${BASH_SOURCE[0]} completed successfully." >&2 + echo "${BASH_SOURCE[0]} completed successfully." } trap on_exit EXIT @@ -26,27 +26,27 @@ exec > >(tee -a /var/log/elastic-stack.log | logger -t user-data -s 2>/dev/conso # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html if [[ "${BUILDKITE_ENABLE_INSTANCE_STORAGE:-false}" != "true" ]]; then - echo Skipping mounting instance storage. >&2 + echo Skipped mounting instance storage. exit 0 fi -echo Mounting instance storage... >&2 +echo Mounting instance storage... #shellcheck disable=SC2207 devices=($(nvme list | grep "Amazon EC2 NVMe Instance Storage" | cut -f1 -d' ' || true)) if [[ -z "${devices[*]}" ]]; then - echo No NVMe drives to mount. >&2 - echo Please check that your instance type supports instance storage. >&2 + echo No NVMe drives to mount. + echo Please check that your instance type supports instance storage. exit 0 fi -echo "Found NVMe devices: ${devices[*]}." >&2 +echo "Found NVMe devices: ${devices[*]}." if [[ "${#devices[@]}" -eq 1 ]]; then - echo Mounting instance storage device directly... >&2 + echo Mounting instance storage device directly... logicalname="${devices[0]}" elif [[ "${#devices[@]}" -gt 1 ]]; then - echo Mounting instance storage devices using software RAID... >&2 + echo Mounting instance storage devices using software RAID... logicalname=/dev/md0 mdadm \ @@ -54,31 +54,31 @@ elif [[ "${#devices[@]}" -gt 1 ]]; then --level=0 \ -c256 \ --raid-devices="${#devices[@]}" "${devices[@]}" - echo "Mounted ${devices[*]} to $logicalname." >&2 + echo "Mounted ${devices[*]} to $logicalname." echo "DEVICE ${devices[*]}" > /etc/mdadm.conf - echo Created /etc/mdadm.conf: >&2 - cat /etc/mdadm.conf >&2 + echo Created /etc/mdadm.conf: + cat /etc/mdadm.conf mdadm --detail --scan >> /etc/mdadm.conf - echo Updated /etc/mdadm.conf: >&2 - cat /etc/mdadm.conf >&2 + echo Updated /etc/mdadm.conf: + cat /etc/mdadm.conf - echo Setting readahead to 64k... >&2 + echo Setting readahead to 64k... blockdev --setra 65536 "$logicalname" else - echo Expected at least once nvme device, found: "${devices[*]}" >&2 + echo Expected at least once nvme device, found: "${devices[*]}" echo - echo This error is unexpected. Please contact support@buildkite.com >&2 + echo This error is unexpected. Please contact support@buildkite.com exit 1 fi -echo "Formatting $logicalname as ext4..." >&2 +echo "Formatting $logicalname as ext4..." # Make an ext4 file system, [-F]orce creation, don’t TRIM at fs creation time (-E nodiscard) mkfs.ext4 -F -E nodiscard "$logicalname" > /dev/null devicemount=/mnt/ephemeral -echo "Mounting $logicalname to $devicemount..." >&2 +echo "Mounting $logicalname to $devicemount..." fs_type="ext4" mount_options="defaults,noatime" @@ -86,16 +86,16 @@ mkdir -p "$devicemount" mount -t "$fs_type" -o "$mount_options" "$logicalname" "$devicemount" if [[ ! -f /etc/fstab.backup ]]; then - echo Backing up /etc/fstab to /etc/fstab.backup... >&2 + echo Backing up /etc/fstab to /etc/fstab.backup... cp -rP /etc/fstab /etc/fstab.backup fstab_line="$logicalname $devicemount ${fs_type} ${mount_options} 0 0" - echo "Appending $fstab_line to /etc/fstab..." >&2 + echo "Appending $fstab_line to /etc/fstab..." echo "$fstab_line" >> /etc/fstab - echo Appened to /etc/fstab: >&2 - cat /etc/fstab >&2 + echo Appened to /etc/fstab: + cat /etc/fstab else - echo /etc/fstab.backup already exists. Not mofidying /etc/fstab: >&2 - cat /etc/fstab >&2 + echo /etc/fstab.backup already exists. Not mofidying /etc/fstab: + cat /etc/fstab fi From 3f8bdf9feeb84ca9355e301d571b6a54e2c02686 Mon Sep 17 00:00:00 2001 From: Narthana Epa Date: Mon, 2 Oct 2023 20:41:57 +1100 Subject: [PATCH 3/4] Tweak comments and logs --- packer/linux/conf/bin/bk-install-elastic-stack.sh | 2 +- packer/linux/conf/bin/bk-mount-instance-storage.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packer/linux/conf/bin/bk-install-elastic-stack.sh b/packer/linux/conf/bin/bk-install-elastic-stack.sh index 23f62e5a0..c7f657a31 100755 --- a/packer/linux/conf/bin/bk-install-elastic-stack.sh +++ b/packer/linux/conf/bin/bk-install-elastic-stack.sh @@ -185,7 +185,7 @@ if [[ -n "${BUILDKITE_AGENT_TAGS:-}" ]]; then fi echo "Agent metadata after splitting commas: ${agent_metadata[*]-}" -# Enable git-mirrors +# Enable git-mirrors if a git mirrors path is provided BUILDKITE_AGENT_GIT_MIRRORS_PATH="" if [[ "${BUILDKITE_AGENT_ENABLE_GIT_MIRRORS:-false}" == "true" ]]; then BUILDKITE_AGENT_GIT_MIRRORS_PATH=/var/lib/buildkite-agent/git-mirrors diff --git a/packer/linux/conf/bin/bk-mount-instance-storage.sh b/packer/linux/conf/bin/bk-mount-instance-storage.sh index 37507afcb..837f51bce 100755 --- a/packer/linux/conf/bin/bk-mount-instance-storage.sh +++ b/packer/linux/conf/bin/bk-mount-instance-storage.sh @@ -67,9 +67,9 @@ elif [[ "${#devices[@]}" -gt 1 ]]; then echo Setting readahead to 64k... blockdev --setra 65536 "$logicalname" else - echo Expected at least once nvme device, found: "${devices[*]}" + echo Expected at least one nvme device, found: "${devices[*]}" echo - echo This error is unexpected. Please contact support@buildkite.com + echo This error is unexpected. Please contact support@buildkite.com. exit 1 fi From 19b8bbaa7500f312cda98db483d78fb1880a7022 Mon Sep 17 00:00:00 2001 From: Narthana Epa Date: Mon, 2 Oct 2023 22:59:49 +1100 Subject: [PATCH 4/4] Fix scripts print success when errorining due to unbound variables --- packer/linux/conf/bin/bk-configure-docker.sh | 2 +- packer/linux/conf/bin/bk-install-elastic-stack.sh | 2 +- packer/linux/conf/bin/bk-mount-instance-storage.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packer/linux/conf/bin/bk-configure-docker.sh b/packer/linux/conf/bin/bk-configure-docker.sh index cd73c07df..434ca941c 100755 --- a/packer/linux/conf/bin/bk-configure-docker.sh +++ b/packer/linux/conf/bin/bk-configure-docker.sh @@ -17,7 +17,7 @@ on_exit() { echo "${BASH_SOURCE[0]} completed successfully." } -trap on_exit EXIT +trap '[[ $? = 0 ]] && on_exit' EXIT ## Configure docker before system starts diff --git a/packer/linux/conf/bin/bk-install-elastic-stack.sh b/packer/linux/conf/bin/bk-install-elastic-stack.sh index c7f657a31..35ce5ff9a 100755 --- a/packer/linux/conf/bin/bk-install-elastic-stack.sh +++ b/packer/linux/conf/bin/bk-install-elastic-stack.sh @@ -35,7 +35,7 @@ on_exit() { echo "${BASH_SOURCE[0]} completed successfully." } -trap on_exit EXIT +trap '[[ $? = 0 ]] && on_exit' EXIT # Write to system console and to our log file # See https://alestic.com/2010/12/ec2-user-data-output/ diff --git a/packer/linux/conf/bin/bk-mount-instance-storage.sh b/packer/linux/conf/bin/bk-mount-instance-storage.sh index 837f51bce..20ae79808 100755 --- a/packer/linux/conf/bin/bk-mount-instance-storage.sh +++ b/packer/linux/conf/bin/bk-mount-instance-storage.sh @@ -16,7 +16,7 @@ on_exit() { echo "${BASH_SOURCE[0]} completed successfully." } -trap on_exit EXIT +trap '[[ $? = 0 ]] && on_exit' EXIT # Write to system console and to our log file # See https://alestic.com/2010/12/ec2-user-data-output/