diff --git a/deploy/jiniaslog/monolith/redis/redis.yml b/deploy/jiniaslog/monolith/redis/redis.yml new file mode 100644 index 00000000..b51033c6 --- /dev/null +++ b/deploy/jiniaslog/monolith/redis/redis.yml @@ -0,0 +1,1216 @@ +--- +# Source: redis-ha/templates/redis-ha-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: jinias-redis-ha + namespace: "default" + labels: + release: jinias + app: jinias-redis-ha +--- +# Source: redis-ha/templates/redis-ha-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: jinias-redis-ha-configmap + namespace: "default" + labels: + release: jinias + chart: redis-ha-4.29.4 + app: jinias-redis-ha +data: + redis.conf: | + dir "/data" + port 6379 + rename-command FLUSHDB "" + rename-command FLUSHALL "" + maxmemory 1gb + maxmemory-policy volatile-lru + min-replicas-max-lag 5 + min-replicas-to-write 1 + rdbchecksum yes + rdbcompression yes + repl-diskless-sync yes + save 900 1 + + sentinel.conf: | + dir "/data" + port 26379 + sentinel down-after-milliseconds mymaster 10000 + sentinel failover-timeout mymaster 180000 + maxclients 10000 + sentinel parallel-syncs mymaster 5 + + init.sh: | + echo "$(date) Start..." + HOSTNAME="$(hostname)" + INDEX="${HOSTNAME##*-}" + SENTINEL_PORT=26379 + ANNOUNCE_IP='' + MASTER='' + MASTER_GROUP="mymaster" + QUORUM="2" + REDIS_CONF=/data/conf/redis.conf + REDIS_PORT=6379 + REDIS_TLS_PORT= + SENTINEL_CONF=/data/conf/sentinel.conf + SENTINEL_TLS_PORT= + SERVICE=jinias-redis-ha + SENTINEL_TLS_REPLICATION_ENABLED=false + REDIS_TLS_REPLICATION_ENABLED=false + + set -eu + sentinel_get_master() { + set +e + if [ "$SENTINEL_PORT" -eq 0 ]; then + redis-cli -h "${SERVICE}" -p "${SENTINEL_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key sentinel get-master-addr-by-name "${MASTER_GROUP}" |\ + grep -E '((^\s*((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))\s*$)|(^\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?s*$))' + else + redis-cli -h "${SERVICE}" -p "${SENTINEL_PORT}" sentinel get-master-addr-by-name "${MASTER_GROUP}" |\ + grep -E '((^\s*((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))\s*$)|(^\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?s*$))' + fi + set -e + } + + sentinel_get_master_retry() { + master='' + retry=${1} + sleep=3 + for i in $(seq 1 "${retry}"); do + master=$(sentinel_get_master) + if [ -n "${master}" ]; then + break + fi + sleep $((sleep + i)) + done + echo "${master}" + } + + identify_master() { + echo "Identifying redis master (get-master-addr-by-name).." + echo " using sentinel (jinias-redis-ha), sentinel group name (mymaster)" + MASTER="$(sentinel_get_master_retry 3)" + if [ -n "${MASTER}" ]; then + echo " $(date) Found redis master (${MASTER})" + else + echo " $(date) Did not find redis master (${MASTER})" + fi + } + + sentinel_update() { + echo "Updating sentinel config.." + echo " evaluating sentinel id (\${SENTINEL_ID_${INDEX}})" + eval MY_SENTINEL_ID="\$SENTINEL_ID_${INDEX}" + echo " sentinel id (${MY_SENTINEL_ID}), sentinel grp (${MASTER_GROUP}), quorum (${QUORUM})" + sed -i "1s/^/sentinel myid ${MY_SENTINEL_ID}\\n/" "${SENTINEL_CONF}" + if [ "$SENTINEL_TLS_REPLICATION_ENABLED" = true ]; then + echo " redis master (${1}:${REDIS_TLS_PORT})" + sed -i "2s/^/sentinel monitor ${MASTER_GROUP} ${1} ${REDIS_TLS_PORT} ${QUORUM} \\n/" "${SENTINEL_CONF}" + else + echo " redis master (${1}:${REDIS_PORT})" + sed -i "2s/^/sentinel monitor ${MASTER_GROUP} ${1} ${REDIS_PORT} ${QUORUM} \\n/" "${SENTINEL_CONF}" + fi + echo "sentinel announce-ip ${ANNOUNCE_IP}" >> ${SENTINEL_CONF} + if [ "$SENTINEL_PORT" -eq 0 ]; then + echo " announce (${ANNOUNCE_IP}:${SENTINEL_TLS_PORT})" + echo "sentinel announce-port ${SENTINEL_TLS_PORT}" >> ${SENTINEL_CONF} + else + echo " announce (${ANNOUNCE_IP}:${SENTINEL_PORT})" + echo "sentinel announce-port ${SENTINEL_PORT}" >> ${SENTINEL_CONF} + fi + } + + redis_update() { + echo "Updating redis config.." + if [ "$REDIS_TLS_REPLICATION_ENABLED" = true ]; then + echo " we are slave of redis master (${1}:${REDIS_TLS_PORT})" + echo "slaveof ${1} ${REDIS_TLS_PORT}" >> "${REDIS_CONF}" + echo "slave-announce-port ${REDIS_TLS_PORT}" >> ${REDIS_CONF} + else + echo " we are slave of redis master (${1}:${REDIS_PORT})" + echo "slaveof ${1} ${REDIS_PORT}" >> "${REDIS_CONF}" + echo "slave-announce-port ${REDIS_PORT}" >> ${REDIS_CONF} + fi + echo "slave-announce-ip ${ANNOUNCE_IP}" >> ${REDIS_CONF} + } + + copy_config() { + echo "Copying default redis config.." + echo " to '${REDIS_CONF}'" + cp /readonly-config/redis.conf "${REDIS_CONF}" + echo "Copying default sentinel config.." + echo " to '${SENTINEL_CONF}'" + cp /readonly-config/sentinel.conf "${SENTINEL_CONF}" + } + + setup_defaults() { + echo "Setting up defaults.." + echo " using statefulset index (${INDEX})" + if [ "${INDEX}" = "0" ]; then + echo "Setting this pod as master for redis and sentinel.." + echo " using announce (${ANNOUNCE_IP})" + redis_update "${ANNOUNCE_IP}" + sentinel_update "${ANNOUNCE_IP}" + echo " make sure ${ANNOUNCE_IP} is not a slave (slaveof no one)" + sed -i "s/^.*slaveof.*//" "${REDIS_CONF}" + else + echo "Getting redis master ip.." + echo " blindly assuming (${SERVICE}-announce-0) or (${SERVICE}-server-0) are master" + DEFAULT_MASTER="$(getent_hosts 0 | awk '{ print $1 }')" + if [ -z "${DEFAULT_MASTER}" ]; then + echo "Error: Unable to resolve redis master (getent hosts)." + exit 1 + fi + echo " identified redis (may be redis master) ip (${DEFAULT_MASTER})" + echo "Setting default slave config for redis and sentinel.." + echo " using master ip (${DEFAULT_MASTER})" + redis_update "${DEFAULT_MASTER}" + sentinel_update "${DEFAULT_MASTER}" + fi + } + + redis_ping() { + set +e + if [ "$REDIS_PORT" -eq 0 ]; then + redis-cli -h "${MASTER}" -p "${REDIS_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key ping + else + redis-cli -h "${MASTER}" -p "${REDIS_PORT}" ping + fi + set -e + } + + redis_ping_retry() { + ping='' + retry=${1} + sleep=3 + for i in $(seq 1 "${retry}"); do + if [ "$(redis_ping)" = "PONG" ]; then + ping='PONG' + break + fi + sleep $((sleep + i)) + MASTER=$(sentinel_get_master) + done + echo "${ping}" + } + + find_master() { + echo "Verifying redis master.." + if [ "$REDIS_PORT" -eq 0 ]; then + echo " ping (${MASTER}:${REDIS_TLS_PORT})" + else + echo " ping (${MASTER}:${REDIS_PORT})" + fi + if [ "$(redis_ping_retry 3)" != "PONG" ]; then + echo " $(date) Can't ping redis master (${MASTER})" + echo "Attempting to force failover (sentinel failover).." + + if [ "$SENTINEL_PORT" -eq 0 ]; then + echo " on sentinel (${SERVICE}:${SENTINEL_TLS_PORT}), sentinel grp (${MASTER_GROUP})" + if redis-cli -h "${SERVICE}" -p "${SENTINEL_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key sentinel failover "${MASTER_GROUP}" | grep -q 'NOGOODSLAVE' ; then + echo " $(date) Failover returned with 'NOGOODSLAVE'" + echo "Setting defaults for this pod.." + setup_defaults + return 0 + fi + else + echo " on sentinel (${SERVICE}:${SENTINEL_PORT}), sentinel grp (${MASTER_GROUP})" + if redis-cli -h "${SERVICE}" -p "${SENTINEL_PORT}" sentinel failover "${MASTER_GROUP}" | grep -q 'NOGOODSLAVE' ; then + echo " $(date) Failover returned with 'NOGOODSLAVE'" + echo "Setting defaults for this pod.." + setup_defaults + return 0 + fi + fi + + echo "Hold on for 10sec" + sleep 10 + echo "We should get redis master's ip now. Asking (get-master-addr-by-name).." + if [ "$SENTINEL_PORT" -eq 0 ]; then + echo " sentinel (${SERVICE}:${SENTINEL_TLS_PORT}), sentinel grp (${MASTER_GROUP})" + else + echo " sentinel (${SERVICE}:${SENTINEL_PORT}), sentinel grp (${MASTER_GROUP})" + fi + MASTER="$(sentinel_get_master)" + if [ "${MASTER}" ]; then + echo " $(date) Found redis master (${MASTER})" + echo "Updating redis and sentinel config.." + sentinel_update "${MASTER}" + redis_update "${MASTER}" + else + echo "$(date) Error: Could not failover, exiting..." + exit 1 + fi + else + echo " $(date) Found reachable redis master (${MASTER})" + echo "Updating redis and sentinel config.." + sentinel_update "${MASTER}" + redis_update "${MASTER}" + fi + } + + redis_ro_update() { + echo "Updating read-only redis config.." + echo " redis.conf set 'replica-priority 0'" + echo "replica-priority 0" >> ${REDIS_CONF} + } + + getent_hosts() { + index=${1:-${INDEX}} + service="${SERVICE}-announce-${index}" + host=$(getent hosts "${service}") + echo "${host}" + } + + identify_announce_ip() { + echo "Identify announce ip for this pod.." + echo " using (${SERVICE}-announce-${INDEX}) or (${SERVICE}-server-${INDEX})" + ANNOUNCE_IP=$(getent_hosts | awk '{ print $1 }') + echo " identified announce (${ANNOUNCE_IP})" + } + + mkdir -p /data/conf/ + + echo "Initializing config.." + copy_config + + # where is redis master + identify_master + + identify_announce_ip + + if [ -z "${ANNOUNCE_IP}" ]; then + "Error: Could not resolve the announce ip for this pod." + exit 1 + elif [ "${MASTER}" ]; then + find_master + else + setup_defaults + fi + + if [ "${AUTH:-}" ]; then + echo "Setting redis auth values.." + ESCAPED_AUTH=$(echo "${AUTH}" | sed -e 's/[\/&]/\\&/g'); + sed -i "s/replace-default-auth/${ESCAPED_AUTH}/" "${REDIS_CONF}" "${SENTINEL_CONF}" + fi + + if [ "${SENTINELAUTH:-}" ]; then + echo "Setting sentinel auth values" + ESCAPED_AUTH_SENTINEL=$(echo "$SENTINELAUTH" | sed -e 's/[\/&]/\\&/g'); + sed -i "s/replace-default-sentinel-auth/${ESCAPED_AUTH_SENTINEL}/" "$SENTINEL_CONF" + fi + + echo "$(date) Ready..." + + fix-split-brain.sh: | + HOSTNAME="$(hostname)" + INDEX="${HOSTNAME##*-}" + SENTINEL_PORT=26379 + ANNOUNCE_IP='' + MASTER='' + MASTER_GROUP="mymaster" + QUORUM="2" + REDIS_CONF=/data/conf/redis.conf + REDIS_PORT=6379 + REDIS_TLS_PORT= + SENTINEL_CONF=/data/conf/sentinel.conf + SENTINEL_TLS_PORT= + SERVICE=jinias-redis-ha + SENTINEL_TLS_REPLICATION_ENABLED=false + REDIS_TLS_REPLICATION_ENABLED=false + + ROLE='' + REDIS_MASTER='' + + set -eu + sentinel_get_master() { + set +e + if [ "$SENTINEL_PORT" -eq 0 ]; then + redis-cli -h "${SERVICE}" -p "${SENTINEL_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key sentinel get-master-addr-by-name "${MASTER_GROUP}" |\ + grep -E '((^\s*((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))\s*$)|(^\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?s*$))' + else + redis-cli -h "${SERVICE}" -p "${SENTINEL_PORT}" sentinel get-master-addr-by-name "${MASTER_GROUP}" |\ + grep -E '((^\s*((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))\s*$)|(^\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?s*$))' + fi + set -e + } + + sentinel_get_master_retry() { + master='' + retry=${1} + sleep=3 + for i in $(seq 1 "${retry}"); do + master=$(sentinel_get_master) + if [ -n "${master}" ]; then + break + fi + sleep $((sleep + i)) + done + echo "${master}" + } + + identify_master() { + echo "Identifying redis master (get-master-addr-by-name).." + echo " using sentinel (jinias-redis-ha), sentinel group name (mymaster)" + MASTER="$(sentinel_get_master_retry 3)" + if [ -n "${MASTER}" ]; then + echo " $(date) Found redis master (${MASTER})" + else + echo " $(date) Did not find redis master (${MASTER})" + fi + } + + sentinel_update() { + echo "Updating sentinel config.." + echo " evaluating sentinel id (\${SENTINEL_ID_${INDEX}})" + eval MY_SENTINEL_ID="\$SENTINEL_ID_${INDEX}" + echo " sentinel id (${MY_SENTINEL_ID}), sentinel grp (${MASTER_GROUP}), quorum (${QUORUM})" + sed -i "1s/^/sentinel myid ${MY_SENTINEL_ID}\\n/" "${SENTINEL_CONF}" + if [ "$SENTINEL_TLS_REPLICATION_ENABLED" = true ]; then + echo " redis master (${1}:${REDIS_TLS_PORT})" + sed -i "2s/^/sentinel monitor ${MASTER_GROUP} ${1} ${REDIS_TLS_PORT} ${QUORUM} \\n/" "${SENTINEL_CONF}" + else + echo " redis master (${1}:${REDIS_PORT})" + sed -i "2s/^/sentinel monitor ${MASTER_GROUP} ${1} ${REDIS_PORT} ${QUORUM} \\n/" "${SENTINEL_CONF}" + fi + echo "sentinel announce-ip ${ANNOUNCE_IP}" >> ${SENTINEL_CONF} + if [ "$SENTINEL_PORT" -eq 0 ]; then + echo " announce (${ANNOUNCE_IP}:${SENTINEL_TLS_PORT})" + echo "sentinel announce-port ${SENTINEL_TLS_PORT}" >> ${SENTINEL_CONF} + else + echo " announce (${ANNOUNCE_IP}:${SENTINEL_PORT})" + echo "sentinel announce-port ${SENTINEL_PORT}" >> ${SENTINEL_CONF} + fi + } + + redis_update() { + echo "Updating redis config.." + if [ "$REDIS_TLS_REPLICATION_ENABLED" = true ]; then + echo " we are slave of redis master (${1}:${REDIS_TLS_PORT})" + echo "slaveof ${1} ${REDIS_TLS_PORT}" >> "${REDIS_CONF}" + echo "slave-announce-port ${REDIS_TLS_PORT}" >> ${REDIS_CONF} + else + echo " we are slave of redis master (${1}:${REDIS_PORT})" + echo "slaveof ${1} ${REDIS_PORT}" >> "${REDIS_CONF}" + echo "slave-announce-port ${REDIS_PORT}" >> ${REDIS_CONF} + fi + echo "slave-announce-ip ${ANNOUNCE_IP}" >> ${REDIS_CONF} + } + + copy_config() { + echo "Copying default redis config.." + echo " to '${REDIS_CONF}'" + cp /readonly-config/redis.conf "${REDIS_CONF}" + echo "Copying default sentinel config.." + echo " to '${SENTINEL_CONF}'" + cp /readonly-config/sentinel.conf "${SENTINEL_CONF}" + } + + setup_defaults() { + echo "Setting up defaults.." + echo " using statefulset index (${INDEX})" + if [ "${INDEX}" = "0" ]; then + echo "Setting this pod as master for redis and sentinel.." + echo " using announce (${ANNOUNCE_IP})" + redis_update "${ANNOUNCE_IP}" + sentinel_update "${ANNOUNCE_IP}" + echo " make sure ${ANNOUNCE_IP} is not a slave (slaveof no one)" + sed -i "s/^.*slaveof.*//" "${REDIS_CONF}" + else + echo "Getting redis master ip.." + echo " blindly assuming (${SERVICE}-announce-0) or (${SERVICE}-server-0) are master" + DEFAULT_MASTER="$(getent_hosts 0 | awk '{ print $1 }')" + if [ -z "${DEFAULT_MASTER}" ]; then + echo "Error: Unable to resolve redis master (getent hosts)." + exit 1 + fi + echo " identified redis (may be redis master) ip (${DEFAULT_MASTER})" + echo "Setting default slave config for redis and sentinel.." + echo " using master ip (${DEFAULT_MASTER})" + redis_update "${DEFAULT_MASTER}" + sentinel_update "${DEFAULT_MASTER}" + fi + } + + redis_ping() { + set +e + if [ "$REDIS_PORT" -eq 0 ]; then + redis-cli -h "${MASTER}" -p "${REDIS_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key ping + else + redis-cli -h "${MASTER}" -p "${REDIS_PORT}" ping + fi + set -e + } + + redis_ping_retry() { + ping='' + retry=${1} + sleep=3 + for i in $(seq 1 "${retry}"); do + if [ "$(redis_ping)" = "PONG" ]; then + ping='PONG' + break + fi + sleep $((sleep + i)) + MASTER=$(sentinel_get_master) + done + echo "${ping}" + } + + find_master() { + echo "Verifying redis master.." + if [ "$REDIS_PORT" -eq 0 ]; then + echo " ping (${MASTER}:${REDIS_TLS_PORT})" + else + echo " ping (${MASTER}:${REDIS_PORT})" + fi + if [ "$(redis_ping_retry 3)" != "PONG" ]; then + echo " $(date) Can't ping redis master (${MASTER})" + echo "Attempting to force failover (sentinel failover).." + + if [ "$SENTINEL_PORT" -eq 0 ]; then + echo " on sentinel (${SERVICE}:${SENTINEL_TLS_PORT}), sentinel grp (${MASTER_GROUP})" + if redis-cli -h "${SERVICE}" -p "${SENTINEL_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key sentinel failover "${MASTER_GROUP}" | grep -q 'NOGOODSLAVE' ; then + echo " $(date) Failover returned with 'NOGOODSLAVE'" + echo "Setting defaults for this pod.." + setup_defaults + return 0 + fi + else + echo " on sentinel (${SERVICE}:${SENTINEL_PORT}), sentinel grp (${MASTER_GROUP})" + if redis-cli -h "${SERVICE}" -p "${SENTINEL_PORT}" sentinel failover "${MASTER_GROUP}" | grep -q 'NOGOODSLAVE' ; then + echo " $(date) Failover returned with 'NOGOODSLAVE'" + echo "Setting defaults for this pod.." + setup_defaults + return 0 + fi + fi + + echo "Hold on for 10sec" + sleep 10 + echo "We should get redis master's ip now. Asking (get-master-addr-by-name).." + if [ "$SENTINEL_PORT" -eq 0 ]; then + echo " sentinel (${SERVICE}:${SENTINEL_TLS_PORT}), sentinel grp (${MASTER_GROUP})" + else + echo " sentinel (${SERVICE}:${SENTINEL_PORT}), sentinel grp (${MASTER_GROUP})" + fi + MASTER="$(sentinel_get_master)" + if [ "${MASTER}" ]; then + echo " $(date) Found redis master (${MASTER})" + echo "Updating redis and sentinel config.." + sentinel_update "${MASTER}" + redis_update "${MASTER}" + else + echo "$(date) Error: Could not failover, exiting..." + exit 1 + fi + else + echo " $(date) Found reachable redis master (${MASTER})" + echo "Updating redis and sentinel config.." + sentinel_update "${MASTER}" + redis_update "${MASTER}" + fi + } + + redis_ro_update() { + echo "Updating read-only redis config.." + echo " redis.conf set 'replica-priority 0'" + echo "replica-priority 0" >> ${REDIS_CONF} + } + + getent_hosts() { + index=${1:-${INDEX}} + service="${SERVICE}-announce-${index}" + host=$(getent hosts "${service}") + echo "${host}" + } + + identify_announce_ip() { + echo "Identify announce ip for this pod.." + echo " using (${SERVICE}-announce-${INDEX}) or (${SERVICE}-server-${INDEX})" + ANNOUNCE_IP=$(getent_hosts | awk '{ print $1 }') + echo " identified announce (${ANNOUNCE_IP})" + } + + redis_role() { + set +e + if [ "$REDIS_PORT" -eq 0 ]; then + ROLE=$(redis-cli -p "${REDIS_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key info | grep role | sed 's/role://' | sed 's/\r//') + else + ROLE=$(redis-cli -p "${REDIS_PORT}" info | grep role | sed 's/role://' | sed 's/\r//') + fi + set -e + } + + identify_redis_master() { + set +e + if [ "$REDIS_PORT" -eq 0 ]; then + REDIS_MASTER=$(redis-cli -p "${REDIS_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key info | grep master_host | sed 's/master_host://' | sed 's/\r//') + else + REDIS_MASTER=$(redis-cli -p "${REDIS_PORT}" info | grep master_host | sed 's/master_host://' | sed 's/\r//') + fi + set -e + } + + reinit() { + set +e + sh /readonly-config/init.sh + + if [ "$REDIS_PORT" -eq 0 ]; then + echo "shutdown" | redis-cli -p "${REDIS_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key + else + echo "shutdown" | redis-cli -p "${REDIS_PORT}" + fi + set -e + } + + identify_announce_ip + + while [ -z "${ANNOUNCE_IP}" ]; do + echo "Error: Could not resolve the announce ip for this pod." + sleep 30 + identify_announce_ip + done + + trap "exit 0" TERM + while true; do + sleep 60 + + # where is redis master + identify_master + + if [ "$MASTER" = "$ANNOUNCE_IP" ]; then + redis_role + if [ "$ROLE" != "master" ]; then + reinit + fi + elif [ "${MASTER}" ]; then + identify_redis_master + if [ "$REDIS_MASTER" != "$MASTER" ]; then + reinit + fi + fi + done + + + haproxy_init.sh: | + HAPROXY_CONF=/data/haproxy.cfg + cp /readonly/haproxy.cfg "$HAPROXY_CONF" + for loop in $(seq 1 10); do + getent hosts jinias-redis-ha-announce-0 && break + echo "Waiting for service jinias-redis-ha-announce-0 to be ready ($loop) ..." && sleep 1 + done + ANNOUNCE_IP0=$(getent hosts "jinias-redis-ha-announce-0" | awk '{ print $1 }') + if [ -z "$ANNOUNCE_IP0" ]; then + echo "Could not resolve the announce ip for jinias-redis-ha-announce-0" + exit 1 + fi + sed -i "s/REPLACE_ANNOUNCE0/$ANNOUNCE_IP0/" "$HAPROXY_CONF" + for loop in $(seq 1 10); do + getent hosts jinias-redis-ha-announce-1 && break + echo "Waiting for service jinias-redis-ha-announce-1 to be ready ($loop) ..." && sleep 1 + done + ANNOUNCE_IP1=$(getent hosts "jinias-redis-ha-announce-1" | awk '{ print $1 }') + if [ -z "$ANNOUNCE_IP1" ]; then + echo "Could not resolve the announce ip for jinias-redis-ha-announce-1" + exit 1 + fi + sed -i "s/REPLACE_ANNOUNCE1/$ANNOUNCE_IP1/" "$HAPROXY_CONF" + for loop in $(seq 1 10); do + getent hosts jinias-redis-ha-announce-2 && break + echo "Waiting for service jinias-redis-ha-announce-2 to be ready ($loop) ..." && sleep 1 + done + ANNOUNCE_IP2=$(getent hosts "jinias-redis-ha-announce-2" | awk '{ print $1 }') + if [ -z "$ANNOUNCE_IP2" ]; then + echo "Could not resolve the announce ip for jinias-redis-ha-announce-2" + exit 1 + fi + sed -i "s/REPLACE_ANNOUNCE2/$ANNOUNCE_IP2/" "$HAPROXY_CONF" + trigger-failover-if-master.sh: | + get_redis_role() { + is_master=$( + redis-cli \ + -h localhost \ + -p 6379 \ + info | grep -c 'role:master' || true + ) + } + get_redis_role + if [[ "$is_master" -eq 1 ]]; then + echo "This node is currently master, we trigger a failover." + response=$( + redis-cli \ + -h localhost \ + -p 26379 \ + SENTINEL failover mymaster + ) + if [[ "$response" != "OK" ]] ; then + echo "$response" + exit 1 + fi + timeout=30 + while [[ "$is_master" -eq 1 && $timeout -gt 0 ]]; do + sleep 1 + get_redis_role + timeout=$((timeout - 1)) + done + echo "Failover successful" + fi +--- +# Source: redis-ha/templates/redis-ha-health-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: jinias-redis-ha-health-configmap + namespace: "default" + labels: + release: jinias + chart: redis-ha-4.29.4 + app: jinias-redis-ha +data: + redis_liveness.sh: | + response=$( + redis-cli \ + -h localhost \ + -p 6379 \ + ping + ) + echo "response=$response" + case $response in + PONG|LOADING*) ;; + *) exit 1 ;; + esac + exit 0 + redis_readiness.sh: | + response=$( + redis-cli \ + -h localhost \ + -p 6379 \ + ping + ) + if [ "$response" != "PONG" ] ; then + echo "ping=$response" + exit 1 + fi + + response=$( + redis-cli \ + -h localhost \ + -p 6379 \ + role + ) + role=$( echo "$response" | sed "1!d" ) + if [ "$role" = "master" ]; then + echo "role=$role" + exit 0 + elif [ "$role" = "slave" ]; then + repl=$( echo "$response" | sed "4!d" ) + echo "role=$role; repl=$repl" + if [ "$repl" = "connected" ]; then + exit 0 + else + exit 1 + fi + else + echo "role=$role" + exit 1 + fi + sentinel_liveness.sh: | + response=$( + redis-cli \ + -h localhost \ + -p 26379 \ + ping + ) + if [ "$response" != "PONG" ]; then + echo "$response" + exit 1 + fi + echo "response=$response" +--- +# Source: redis-ha/templates/redis-ha-role.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: jinias-redis-ha + namespace: "default" + labels: + app: redis-ha + release: "jinias" + chart: redis-ha-4.29.4 +rules: +- apiGroups: + - "" + resources: + - endpoints + verbs: + - get +--- +# Source: redis-ha/templates/redis-ha-rolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: jinias-redis-ha + namespace: "default" + labels: + app: redis-ha + release: "jinias" + chart: redis-ha-4.29.4 +subjects: +- kind: ServiceAccount + name: jinias-redis-ha +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: jinias-redis-ha +--- +# Source: redis-ha/templates/redis-ha-announce-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: jinias-redis-ha-announce-0 + namespace: "default" + labels: + app: redis-ha + release: "jinias" + chart: redis-ha-4.29.4 +spec: + publishNotReadyAddresses: true + type: ClusterIP + ports: + - name: tcp-server + port: 6379 + protocol: TCP + targetPort: redis + - name: tcp-sentinel + port: 26379 + protocol: TCP + targetPort: sentinel + - name: metrics + port: 9121 + protocol: TCP + targetPort: 9121 + selector: + release: jinias + app: redis-ha + "statefulset.kubernetes.io/pod-name": jinias-redis-ha-server-0 +--- +# Source: redis-ha/templates/redis-ha-announce-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: jinias-redis-ha-announce-1 + namespace: "default" + labels: + app: redis-ha + release: "jinias" + chart: redis-ha-4.29.4 +spec: + publishNotReadyAddresses: true + type: ClusterIP + ports: + - name: tcp-server + port: 6379 + protocol: TCP + targetPort: redis + - name: tcp-sentinel + port: 26379 + protocol: TCP + targetPort: sentinel + selector: + release: jinias + app: redis-ha + "statefulset.kubernetes.io/pod-name": jinias-redis-ha-server-1 +--- +# Source: redis-ha/templates/redis-ha-announce-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: jinias-redis-ha-announce-2 + namespace: "default" + labels: + app: redis-ha + release: "jinias" + chart: redis-ha-4.29.4 +spec: + publishNotReadyAddresses: true + type: ClusterIP + ports: + - name: tcp-server + port: 6379 + protocol: TCP + targetPort: redis + - name: tcp-sentinel + port: 26379 + protocol: TCP + targetPort: sentinel + selector: + release: jinias + app: redis-ha + "statefulset.kubernetes.io/pod-name": jinias-redis-ha-server-2 +--- +# Source: redis-ha/templates/redis-ha-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: jinias-redis-ha + namespace: "default" + labels: + app: redis-ha + release: "jinias" + chart: redis-ha-4.29.4 +spec: + type: ClusterIP + clusterIP: None + ports: + - name: tcp-server + port: 6379 + protocol: TCP + targetPort: redis + - name: tcp-sentinel + port: 26379 + protocol: TCP + targetPort: sentinel + selector: + release: jinias + app: redis-ha +--- +# Source: redis-ha/templates/redis-ha-statefulset.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: jinias-redis-ha-server + namespace: "default" + labels: + jinias-redis-ha: replica + app: redis-ha + release: "jinias" + chart: redis-ha-4.29.4 + annotations: + {} +spec: + selector: + matchLabels: + release: jinias + app: redis-ha + serviceName: jinias-redis-ha + replicas: 3 + podManagementPolicy: OrderedReady + updateStrategy: + type: RollingUpdate + template: + metadata: + annotations: + checksum/init-config: 35476e45d4cc022feb5f56366eec7a3f71e1f3a10d86c29d8dca4c122db291fe + prometheus.io/scrape: "true" + prometheus.io/port: "9121" + + labels: + release: jinias + app: redis-ha + jinias-redis-ha: replica + spec: + terminationGracePeriodSeconds: 60 + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: redis-ha + release: jinias + jinias-redis-ha: replica + topologyKey: kubernetes.io/hostname + securityContext: + fsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + serviceAccountName: jinias-redis-ha + automountServiceAccountToken: false + initContainers: + - name: config-init + image: public.ecr.aws/docker/library/redis:7.2.4-alpine + imagePullPolicy: IfNotPresent + resources: + {} + command: + - sh + args: + - /readonly-config/init.sh + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + env: + - name: SENTINEL_ID_0 + value: 9942b44090ff59a3a1b9fb7b43a411ed4104d734 + - name: SENTINEL_ID_1 + value: 17c89c97289988e3fff73b7950ccdea44fc3c6df + - name: SENTINEL_ID_2 + value: 7409cb0eb3a6d3ac137cf151172e810f3c237307 + volumeMounts: + - name: config + mountPath: /readonly-config + readOnly: true + - name: data + mountPath: /data + containers: + - name: redis + image: public.ecr.aws/docker/library/redis:7.2.4-alpine + imagePullPolicy: IfNotPresent + command: + - redis-server + args: + - /data/conf/redis.conf + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 15 + timeoutSeconds: 15 + successThreshold: 1 + failureThreshold: 5 + exec: + command: + - sh + - -c + - /health/redis_liveness.sh + readinessProbe: + initialDelaySeconds: 30 + periodSeconds: 15 + timeoutSeconds: 15 + successThreshold: 1 + failureThreshold: 5 + exec: + command: + - sh + - -c + - /health/redis_readiness.sh + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 15 + successThreshold: 1 + failureThreshold: 3 + exec: + command: + - sh + - -c + - /health/redis_readiness.sh + resources: + requests: + cpu: "500m" + memory: "512Mi" + limits: + cpu: "1000m" + memory: "1Gi" + ports: + - name: redis + containerPort: 6379 + volumeMounts: + - name: config + mountPath: /readonly-config + readOnly: true + - mountPath: /data + name: data + - mountPath: /health + name: health + lifecycle: + preStop: + exec: + command: + - /bin/sh + - /readonly-config/trigger-failover-if-master.sh + - name: sentinel + image: public.ecr.aws/docker/library/redis:7.2.4-alpine + imagePullPolicy: IfNotPresent + command: + - redis-sentinel + args: + - /data/conf/sentinel.conf + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 15 + timeoutSeconds: 15 + successThreshold: 1 + failureThreshold: 5 + exec: + command: + - sh + - -c + - /health/sentinel_liveness.sh + readinessProbe: + initialDelaySeconds: 30 + periodSeconds: 15 + timeoutSeconds: 15 + successThreshold: 3 + failureThreshold: 5 + exec: + command: + - sh + - -c + - /health/sentinel_liveness.sh + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 15 + successThreshold: 1 + failureThreshold: 3 + exec: + command: + - sh + - -c + - /health/sentinel_liveness.sh + resources: + {} + ports: + - name: sentinel + containerPort: 26379 + volumeMounts: + - mountPath: /data + name: data + - mountPath: /health + name: health + lifecycle: + {} + + - name: redis-exporter + image: oliver006/redis_exporter:v1.66.0 + imagePullPolicy: IfNotPresent + env: + - name: REDIS_ADDR + value: "redis://localhost:6379" + ports: + - name: exporter-port + containerPort: 9121 + livenessProbe: + httpGet: + path: / + port: exporter-port + initialDelaySeconds: 5 + periodSeconds: 10 + readinessProbe: + httpGet: + path: / + port: exporter-port + initialDelaySeconds: 5 + periodSeconds: 10 + + - name: split-brain-fix + image: public.ecr.aws/docker/library/redis:7.2.4-alpine + imagePullPolicy: IfNotPresent + command: + - sh + args: + - /readonly-config/fix-split-brain.sh + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + env: + - name: SENTINEL_ID_0 + value: 9942b44090ff59a3a1b9fb7b43a411ed4104d734 + - name: SENTINEL_ID_1 + value: 17c89c97289988e3fff73b7950ccdea44fc3c6df + - name: SENTINEL_ID_2 + value: 7409cb0eb3a6d3ac137cf151172e810f3c237307 + resources: + {} + volumeMounts: + - name: config + mountPath: /readonly-config + readOnly: true + - mountPath: /data + name: data + volumes: + - name: config + configMap: + name: jinias-redis-ha-configmap + - name: health + configMap: + name: jinias-redis-ha-health-configmap + defaultMode: 0755 + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: data + labels: + {} + + spec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: "1Gi" +--- +# Source: redis-ha/templates/tests/test-redis-ha-configmap.yaml +apiVersion: v1 +kind: Pod +metadata: + name: jinias-redis-ha-configmap-test + namespace: "default" + labels: + app: redis-ha + release: "jinias" + chart: redis-ha-4.29.4 + annotations: + "helm.sh/hook": test-success +spec: + nodeSelector: + {} + tolerations: + null + containers: + - name: check-init + image: koalaman/shellcheck:v0.5.0 + args: + - --shell=sh + - /readonly-config/init.sh + volumeMounts: + - name: config + mountPath: /readonly-config + readOnly: true + resources: + {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + restartPolicy: Never + volumes: + - name: config + configMap: + name: jinias-redis-ha-configmap diff --git a/deploy/o11y/grafana.yml b/deploy/o11y/grafana.yml new file mode 100644 index 00000000..ac1684a0 --- /dev/null +++ b/deploy/o11y/grafana.yml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: grafana + namespace: o11y +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "3000" + + spec: + containers: + - name: grafana + image: grafana/grafana:latest + env: + - name: GF_AUTH_ANONYMOUS_ENABLED + value: "true" + - name: GF_AUTH_ANONYMOUS_ORG_ROLE + value: "Admin" + - name: GF_AUTH_DISABLE_LOGIN_FORM + value: "true" + - name: GF_FEATURE_TOGGLES_ENABLE + value: "traceqlEditor" + ports: + - containerPort: 3000 + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "512Mi" + cpu: "500m" + volumeMounts: + - name: grafana-storage + mountPath: /var/lib/grafana + - name: config-volume + mountPath: /etc/grafana/provisioning/datasources/datasources.yml + subPath: grafana-datasources.yml + volumes: + - name: config-volume + configMap: + name: grafana-datasources-config + volumeClaimTemplates: + - metadata: + name: grafana-storage + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: grafana +spec: + type: NodePort + ports: + - port: 3000 + protocol: TCP + targetPort: 3000 + nodePort: 31272 + selector: + app: grafana \ No newline at end of file diff --git a/deploy/o11y/kube-state-metrics.yml b/deploy/o11y/kube-state-metrics.yml new file mode 100644 index 00000000..c64156e1 --- /dev/null +++ b/deploy/o11y/kube-state-metrics.yml @@ -0,0 +1,302 @@ +apiVersion: v1 +kind: ServiceAccount +automountServiceAccountToken: true +metadata: + labels: + helm.sh/chart: kube-state-metrics-5.26.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: my-kube-state-metrics + app.kubernetes.io/version: "2.13.0" + name: my-kube-state-metrics + namespace: o11y +--- +# Source: kube-state-metrics/templates/role.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + helm.sh/chart: kube-state-metrics-5.26.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: my-kube-state-metrics + app.kubernetes.io/version: "2.13.0" + name: my-kube-state-metrics +rules: + + - apiGroups: ["certificates.k8s.io"] + resources: + - certificatesigningrequests + verbs: ["list", "watch"] + + - apiGroups: [""] + resources: + - configmaps + verbs: ["list", "watch"] + + - apiGroups: ["batch"] + resources: + - cronjobs + verbs: ["list", "watch"] + + - apiGroups: ["extensions", "apps"] + resources: + - daemonsets + verbs: ["list", "watch"] + + - apiGroups: ["extensions", "apps"] + resources: + - deployments + verbs: ["list", "watch"] + + - apiGroups: [""] + resources: + - endpoints + verbs: ["list", "watch"] + + - apiGroups: ["autoscaling"] + resources: + - horizontalpodautoscalers + verbs: ["list", "watch"] + + - apiGroups: ["extensions", "networking.k8s.io"] + resources: + - ingresses + verbs: ["list", "watch"] + + - apiGroups: ["batch"] + resources: + - jobs + verbs: ["list", "watch"] + + - apiGroups: ["coordination.k8s.io"] + resources: + - leases + verbs: ["list", "watch"] + + - apiGroups: [""] + resources: + - limitranges + verbs: ["list", "watch"] + + - apiGroups: ["admissionregistration.k8s.io"] + resources: + - mutatingwebhookconfigurations + verbs: ["list", "watch"] + + - apiGroups: [""] + resources: + - namespaces + verbs: ["list", "watch"] + + - apiGroups: ["networking.k8s.io"] + resources: + - networkpolicies + verbs: ["list", "watch"] + + - apiGroups: [""] + resources: + - nodes + verbs: ["list", "watch"] + + - apiGroups: [""] + resources: + - persistentvolumeclaims + verbs: ["list", "watch"] + + - apiGroups: [""] + resources: + - persistentvolumes + verbs: ["list", "watch"] + + - apiGroups: ["policy"] + resources: + - poddisruptionbudgets + verbs: ["list", "watch"] + + - apiGroups: [""] + resources: + - pods + verbs: ["list", "watch"] + + - apiGroups: ["extensions", "apps"] + resources: + - replicasets + verbs: ["list", "watch"] + + - apiGroups: [""] + resources: + - replicationcontrollers + verbs: ["list", "watch"] + + - apiGroups: [""] + resources: + - resourcequotas + verbs: ["list", "watch"] + + - apiGroups: [""] + resources: + - secrets + verbs: ["list", "watch"] + + - apiGroups: [""] + resources: + - services + verbs: ["list", "watch"] + + - apiGroups: ["apps"] + resources: + - statefulsets + verbs: ["list", "watch"] + + - apiGroups: ["storage.k8s.io"] + resources: + - storageclasses + verbs: ["list", "watch"] + + - apiGroups: ["admissionregistration.k8s.io"] + resources: + - validatingwebhookconfigurations + verbs: ["list", "watch"] + + - apiGroups: ["storage.k8s.io"] + resources: + - volumeattachments + verbs: ["list", "watch"] +--- +# Source: kube-state-metrics/templates/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + helm.sh/chart: kube-state-metrics-5.26.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: my-kube-state-metrics + app.kubernetes.io/version: "2.13.0" + name: my-kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: my-kube-state-metrics +subjects: + - kind: ServiceAccount + name: my-kube-state-metrics + namespace: o11y +--- +# Source: kube-state-metrics/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: my-kube-state-metrics + namespace: o11y + labels: + helm.sh/chart: kube-state-metrics-5.26.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/version: "2.13.0" + annotations: + prometheus.io/scrape: 'true' +spec: + type: "ClusterIP" + ports: + - name: "http" + protocol: TCP + port: 8080 + targetPort: 8080 + + selector: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: my-kube-state-metrics +--- +# Source: kube-state-metrics/templates/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: my-kube-state-metrics + namespace: o11y + labels: + helm.sh/chart: kube-state-metrics-5.26.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: my-kube-state-metrics + app.kubernetes.io/version: "2.13.0" +spec: + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: my-kube-state-metrics + replicas: 1 + strategy: + type: RollingUpdate + revisionHistoryLimit: 10 + template: + metadata: + labels: + helm.sh/chart: kube-state-metrics-5.26.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: my-kube-state-metrics + app.kubernetes.io/version: "2.13.0" + spec: + automountServiceAccountToken: true + hostNetwork: false + serviceAccountName: my-kube-state-metrics + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + containers: + - name: kube-state-metrics + args: + - --port=8080 + - --resources=certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments + imagePullPolicy: IfNotPresent + image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.13.0 + ports: + - containerPort: 8080 + name: "http" + livenessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: + path: /livez + port: 8080 + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + readinessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: + path: /readyz + port: 8080 + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + resources: + {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true diff --git a/deploy/o11y/loki.yml b/deploy/o11y/loki.yml new file mode 100644 index 00000000..7e7e45ae --- /dev/null +++ b/deploy/o11y/loki.yml @@ -0,0 +1,55 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: loki +spec: + replicas: 1 + selector: + matchLabels: + app: loki + template: + metadata: + labels: + app: loki + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "3100" + prometheus.io/path: "/metrics" + spec: + containers: + - name: loki + image: grafana/loki:latest + args: + - "-config.file=/etc/loki/local-config.yaml" + ports: + - containerPort: 3100 + volumeMounts: + - name: loki-storage + mountPath: /loki + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 1000m + memory: 1Gi + volumeClaimTemplates: + - metadata: + name: loki-storage + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 20Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: loki +spec: + ports: + - protocol: TCP + port: 3100 + targetPort: 3100 + selector: + app: loki \ No newline at end of file diff --git a/deploy/o11y/node-exporter.yml b/deploy/o11y/node-exporter.yml new file mode 100644 index 00000000..e3e59ff8 --- /dev/null +++ b/deploy/o11y/node-exporter.yml @@ -0,0 +1,38 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-exporter + namespace: o11y + labels: + app: node-exporter +spec: + selector: + matchLabels: + app: node-exporter + template: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: /metrics + prometheus.io/port: "9100" + labels: + app: node-exporter + spec: + containers: + - name: node-exporter + image: prom/node-exporter:latest + ports: + - containerPort: 9100 + name: metrics + resources: + limits: + cpu: 40m + memory: 50Mi + requests: + cpu: 20m + memory: 30Mi + securityContext: + privileged: true + hostNetwork: true + hostPID: true + hostIPC: true \ No newline at end of file diff --git a/deploy/o11y/otel-collector-scraper.yml b/deploy/o11y/otel-collector-scraper.yml new file mode 100644 index 00000000..abdf4133 --- /dev/null +++ b/deploy/o11y/otel-collector-scraper.yml @@ -0,0 +1,66 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector-scraper + namespace: o11y +spec: + replicas: 1 + selector: + matchLabels: + app: otel-collector + template: + metadata: + labels: + app: otel-collector + spec: + serviceAccountName: otel-collector # 새로 만든 ServiceAccount 설정 + automountServiceAccountToken: true # 토큰 자동 마운트 설정 + containers: + - name: otel-collector + image: otel/opentelemetry-collector-contrib:latest + args: ["--config=/etc/otel-collector.yml"] + ports: + - containerPort: 4318 + - containerPort: 8888 + - containerPort: 8889 + - containerPort: 13133 + volumeMounts: + - name: config-volume + mountPath: /etc/otel-collector.yml + subPath: otel-collector-config.yml + resources: + requests: + cpu: 500m + memory: 256Mi + limits: + cpu: 1000m + memory: 512Mi + volumes: + - name: config-volume + configMap: + name: otel-collector-config +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-collector +spec: + ports: + - name: otlp-http + protocol: TCP + port: 4318 + targetPort: 4318 + - name: otlp-prometheus + protocol: TCP + port: 8888 + targetPort: 8888 + - name: prometheus-exporter + protocol: TCP + port: 8889 + targetPort: 8889 + - name: health-check + protocol: TCP + port: 13133 + targetPort: 13133 + selector: + app: otel-collector \ No newline at end of file diff --git a/deploy/o11y/prometheus.yml b/deploy/o11y/prometheus.yml new file mode 100644 index 00000000..0f074d74 --- /dev/null +++ b/deploy/o11y/prometheus.yml @@ -0,0 +1,71 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: prometheus + namespace: o11y +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + + spec: + containers: + - name: prometheus + image: prom/prometheus:latest + args: + - --config.file=/etc/prometheus.yml + - --storage.tsdb.path=/prometheus + - --storage.tsdb.retention.time=14d + - --web.enable-remote-write-receiver + - --enable-feature=exemplar-storage + ports: + - containerPort: 9090 + volumeMounts: + - name: config-volume + mountPath: /etc/prometheus.yml + subPath: prometheus-config.yml + - name: data-volume + mountPath: /prometheus + resources: + requests: + cpu: 250m + memory: 1Gi + limits: + cpu: 1500m + memory: 2Gi + + volumes: + - name: config-volume + configMap: + name: prometheus-config + volumeClaimTemplates: + - metadata: + name: data-volume + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 20Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus + labels: + operated-prometheus: "true" + self-monitor: "true" +spec: + ports: + - protocol: TCP + port: 9090 + targetPort: 9090 + selector: + app: prometheus \ No newline at end of file diff --git a/deploy/o11y/rpi-exporter.yml b/deploy/o11y/rpi-exporter.yml new file mode 100644 index 00000000..55534bf2 --- /dev/null +++ b/deploy/o11y/rpi-exporter.yml @@ -0,0 +1,34 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: rpi-exporter +spec: + selector: + matchLabels: + app: rpi-exporter + template: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: /metrics + prometheus.io/port: "9110" + labels: + app: rpi-exporter + spec: + containers: + - name: rpi-exporter + image: d3vilh/rpi_exporter-arm64:latest + ports: + - containerPort: 9110 + securityContext: + privileged: true + resources: + limits: + cpu: 15m + memory: 20Mi + requests: + cpu: 10m + memory: 10Mi + hostNetwork: true + hostPID: true + hostIPC: true \ No newline at end of file diff --git a/deploy/o11y/tempo.yml b/deploy/o11y/tempo.yml new file mode 100644 index 00000000..9bf5d8fc --- /dev/null +++ b/deploy/o11y/tempo.yml @@ -0,0 +1,66 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: tempo + namespace: o11y +spec: + replicas: 1 + selector: + matchLabels: + app: tempo + template: + metadata: + labels: + app: tempo + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "3200" + + spec: + containers: + - name: tempo + image: grafana/tempo:latest + args: ["-config.file=/etc/tempo.yml"] + ports: + - containerPort: 3200 + - containerPort: 4318 + volumeMounts: + - name: config-volume + mountPath: /etc/tempo.yml + subPath: tempo-config.yml + resources: + requests: + cpu: 10m + memory: 512Mi + limits: + cpu: 500m + memory: 1Gi + volumes: + - name: config-volume + configMap: + name: tempo-config + volumeClaimTemplates: + - metadata: + name: tempo-storage + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 20Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: tempo +spec: + ports: + - name: http-tempo + protocol: TCP + port: 3200 + targetPort: 3200 + - name: otlp-http + protocol: TCP + port: 4318 + targetPort: 4318 + selector: + app: tempo \ No newline at end of file