Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate Device Health into SONiC #40

Merged
merged 17 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,6 @@
[submodule "src/dhcpmon"]
path = src/dhcpmon
url = https://github.com/sonic-net/sonic-dhcpmon.git
[submodule "src/sonic-device-health"]
path = src/sonic-device-health
url = https://github.com/renukamanavalan/sonic-device-health.git
2 changes: 2 additions & 0 deletions Makefile.work
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# through http.
# * ENABLE_ZTP: Enables zero touch provisioning.
# * SHUTDOWN_BGP_ON_START: Sets admin-down state for all bgp peerings after restart.
# * INCLUDE_DEVICE_HEALTH: Allows including device-health feature
# * INCLUDE_KUBERNETES: Allows including Kubernetes
# * INCLUDE_KUBERNETES_MASTER: Allows including Kubernetes master
# * INCLUDE_MUX: Include MUX feature/services for TOR switch.
Expand Down Expand Up @@ -508,6 +509,7 @@ SONIC_BUILD_INSTRUCTION := $(MAKE) \
ENABLE_ZTP=$(ENABLE_ZTP) \
INCLUDE_PDE=$(INCLUDE_PDE) \
SHUTDOWN_BGP_ON_START=$(SHUTDOWN_BGP_ON_START) \
INCLUDE_DEVICE_HEALTH=$(INCLUDE_DEVICE_HEALTH) \
INCLUDE_KUBERNETES=$(INCLUDE_KUBERNETES) \
KUBERNETES_VERSION=$(KUBERNETES_VERSION) \
KUBERNETES_CNI_VERSION=$(KUBERNETES_CNI_VERSION) \
Expand Down
8 changes: 8 additions & 0 deletions build_debian.sh
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,14 @@ else
echo '[INFO] Skipping Install kubernetes'
fi

if [ "$INCLUDE_DEVICE_HEALTH" == "y" ]
then
sudo mkdir -p $FILESYSTEM_ROOT/usr/share/device_health/
else
echo '[INFO] Skipping Install of device-health'
fi


if [ "$INCLUDE_KUBERNETES_MASTER" == "y" ]
then
## Install Kubernetes master
Expand Down
44 changes: 44 additions & 0 deletions dockers/docker-device-health/Dockerfile.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{% from "dockers/dockerfile-macros.j2" import install_debian_packages, install_python_wheels, copy_files %}
FROM docker-config-engine-bullseye-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}}

ARG docker_container_name
ARG image_version

# Make apt-get non-interactive
ENV DEBIAN_FRONTEND=noninteractive

# Pass the image_version to container
ENV IMAGE_VERSION=$image_version

# Pass the Config location to procs in container
ENV LOM_CONF_LOCATION=/usr/share/lom/

# Set the binaries to run in Prod mode
ENV LOM_RUN_MODE=PROD

# Enable core dump
ENV GOTRACEBACK=crash

# Update apt's cache of available packages
RUN apt-get update

{% if docker_device_health_debs.strip() -%}
# Copy built Debian packages
{{ copy_files("debs/", docker_device_health_debs.split(' '), "/debs/") }}

# Install built Debian packages and implicitly install their dependencies
{{ install_debian_packages(docker_device_health_debs.split(' ')) }}
{%- endif %}

# Clean up
RUN apt-get clean -y && \
apt-get autoclean -y && \
apt-get autoremove -y && \
rm -rf /debs

COPY ["docker_init.sh", "start.sh", "/usr/bin/"]
COPY ["lom.supervisord.conf.j2", "/usr/share/sonic/templates/"]
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
COPY ["critical_processes", "/etc/supervisor"]

ENTRYPOINT ["/usr/bin/docker_init.sh"]
1 change: 1 addition & 0 deletions dockers/docker-device-health/critical_processes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
program:LoMEngine
16 changes: 16 additions & 0 deletions dockers/docker-device-health/docker_init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env bash

# Generate supervisord config file
mkdir -p /etc/supervisor/conf.d/

# Generate supervisord configuration template.
TEMPLATE_FILE="/usr/share/sonic/templates/lom.supervisord.conf.j2"
PROCS_JSON_FILE="/usr/share/lom/procs.conf.json"
SUPERVISORD_FILE="/etc/supervisor/conf.d/LoMSupervisord.conf"

j2 -f json -o ${SUPERVISORD_FILE} ${TEMPLATE_FILE} ${PROCS_JSON_FILE}

# The docker container should start this script as PID 1, so now that supervisord is
# properly configured, we exec /usr/local/bin/supervisord so that it runs as PID 1 for the
# duration of the container's lifetime
exec /usr/local/bin/supervisord
65 changes: 65 additions & 0 deletions dockers/docker-device-health/lom.supervisord.conf.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
[supervisord]
logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:dependent-startup]
command=python3 -m supervisord_dependent_startup
autostart=true
autorestart=unexpected
startretries=0
exitcodes=0,3
events=PROCESS_STATE
buffer_size=1024

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name device_health
events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING
autostart=true
autorestart=unexpected
buffer_size=1024

[program:rsyslogd]
command=/usr/sbin/rsyslogd -n -iNONE
priority=1
autostart=false
autorestart=unexpected
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true

[program:start]
command=/usr/bin/start.sh
priority=2
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=rsyslogd:running


[program:LoMEngine]
command=/usr/bin/LoMEngine
priority=3
autostart=false
autorestart=false
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=start:exited

{% for proc in procs %}
[program:LoMPluginMgr-{{ proc }}]
command=/usr/bin/LoMPluginMgr -proc_id={{ proc }}
priority=3
autostart=false
autorestart=false
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=LoMEngine:running
{% endfor %}


12 changes: 12 additions & 0 deletions dockers/docker-device-health/start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash

if [ "${RUNTIME_OWNER}" == "" ]; then
RUNTIME_OWNER="kube"
fi

CTR_SCRIPT="/usr/share/sonic/scripts/container_startup.py"
if test -f ${CTR_SCRIPT}
then
${CTR_SCRIPT} -f device_health -o ${RUNTIME_OWNER} -v ${IMAGE_VERSION}
fi

17 changes: 17 additions & 0 deletions files/build_templates/device-health.service.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[Unit]
Description=Device-health container
Requires=eventd.service
After=eventd.service
BindsTo=sonic.target
After=sonic.target
StartLimitIntervalSec=1200
StartLimitBurst=3

[Service]
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
ExecStart=/usr/bin/{{docker_container_name}}.sh wait
ExecStop=/usr/bin/{{docker_container_name}}.sh stop
RestartSec=30

[Install]
WantedBy=sonic.target
3 changes: 2 additions & 1 deletion files/build_templates/init_cfg.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
{%- if include_sflow == "y" %}{% do features.append(("sflow", "disabled", false, "enabled")) %}{% endif %}
{%- if include_macsec == "y" %}{% do features.append(("macsec", "{% if 'type' in DEVICE_METADATA['localhost'] and DEVICE_METADATA['localhost']['type'] == 'SpineRouter' and DEVICE_RUNTIME_METADATA['MACSEC_SUPPORTED'] %}enabled{% else %}disabled{% endif %}", false, "enabled")) %}{% endif %}
{%- if include_system_telemetry == "y" %}{% do features.append(("telemetry", "enabled", true, "enabled")) %}{% endif %}
{%- if include_device_health == "y" %}{% do features.append(("device-health", "enabled", true, "enabled")) %}{% endif %}
"FEATURE": {
{# delayed field if set, will start the feature systemd .timer unit instead of .service unit #}
{%- for feature, state, delayed, autorestart in features %}
Expand All @@ -75,7 +76,7 @@
"check_up_status" : "false",
{%- endif %}
{%- if include_kubernetes == "y" %}
{%- if feature in ["lldp", "pmon", "radv", "eventd", "snmp", "telemetry"] %}
{%- if feature in ["lldp", "pmon", "radv", "eventd", "snmp", "telemetry", "device-health"] %}
"set_owner": "kube", {% else %}
"set_owner": "local", {% endif %} {% endif %}
"high_mem_alert": "disabled"
Expand Down
3 changes: 3 additions & 0 deletions files/build_templates/rsyslog_plugin.conf.j2
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ if re_match($programname, "{{ proc.name }}") then {
template="prog_msg")
}
{% endfor %}


$IncludeConfig /usr/share/device_health/*.conf
4 changes: 4 additions & 0 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -952,3 +952,7 @@ sudo rm -rf $FILESYSTEM_ROOT/tmp/mask_disabled_services.py


sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install python3-dbus

{% if include_device_health == 'y' %}
sudo mkdir -p /usr/share/device_health
{%- endif %}
3 changes: 3 additions & 0 deletions rules/config
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@ INCLUDE_TEAMD ?= y
# INCLUDE_ROUTER_ADVERTISER - build docker-router-advertiser for router advertisements support
INCLUDE_ROUTER_ADVERTISER ?= y

# INCLUDE_DEVICE_HEALTH - if set to y installs Device health
INCLUDE_DEVICE_HEALTH ?= y

# INCLUDE_KUBERNETES - if set to y kubernetes packages are installed to be able to
# run as worker node in kubernetes cluster.
INCLUDE_KUBERNETES ?= n
Expand Down
11 changes: 11 additions & 0 deletions rules/device-health.dep
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#DPKG FRK
SPATH := $($(SONIC_DEVICE_HEALTH)_SRC_PATH)
DEP_FILES := $(SONIC_COMMON_FILES_LIST) rules/device-health.mk rules/device-health.dep
DEP_FILES += $(SONIC_COMMON_BASE_FILES_LIST)
DEP_FILES := $(addprefix $(SPATH)/,$(shell cd $(SPATH) && git ls-files))


$(SONIC_DEVICE_HEALTH)_CACHE_MODE := GIT_CONTENT_SHA
$(SONIC_DEVICE_HEALTH)_DEP_FLAGS := $(SONIC_COMMON_FLAGS_LIST)
$(SONIC_DEVICE_HEALTH)_DEP_FILES := $(DEP_FILES)

16 changes: 16 additions & 0 deletions rules/device-health.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# SONiC device-health package

SONIC_DEVICE_HEALTH_VERSION = 1.0.0-0
SONIC_DEVICE_HEALTH_PKG_NAME = device-health

SONIC_DEVICE_HEALTH = sonic-$(SONIC_DEVICE_HEALTH_PKG_NAME)_$(SONIC_DEVICE_HEALTH_VERSION)_$(CONFIGURED_ARCH).deb
$(SONIC_DEVICE_HEALTH)_SRC_PATH = $(SRC_PATH)/sonic-device-health
$(SONIC_DEVICE_HEALTH)_DEPENDS = $(LIBSWSSCOMMON_DEV) $(LIBSWSSCOMMON)

SONIC_DPKG_DEBS += $(SONIC_DEVICE_HEALTH)

# The .c, .cpp, .h & .hpp files under src/{$DBG_SRC_ARCHIVE list}
# are archived into debug one image to facilitate debugging.
#
DBG_SRC_ARCHIVE += sonic-device-health

11 changes: 11 additions & 0 deletions rules/docker-device-health.dep
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

DPATH := $($(DOCKER_DEVICE_HEALTH)_PATH)
DEP_FILES := $(SONIC_COMMON_FILES_LIST) rules/docker-device-health.mk rules/docker-device-health.dep
DEP_FILES += $(SONIC_COMMON_BASE_FILES_LIST)
DEP_FILES += $(shell git ls-files $(DPATH))

$(DOCKER_DEVICE_HEALTH)_CACHE_MODE := GIT_CONTENT_SHA
$(DOCKER_DEVICE_HEALTH)_DEP_FLAGS := $(SONIC_COMMON_FLAGS_LIST)
$(DOCKER_DEVICE_HEALTH)_DEP_FILES := $(DEP_FILES)

$(eval $(call add_dbg_docker,$(DOCKER_DEVICE_HEALTH),$(DOCKER_DEVICE_HEALTH_DBG)))
43 changes: 43 additions & 0 deletions rules/docker-device-health.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# docker image for device-health agent

DOCKER_DEVICE_HEALTH_STEM = docker-device-health
DOCKER_DEVICE_HEALTH = $(DOCKER_DEVICE_HEALTH_STEM).gz
DOCKER_DEVICE_HEALTH_DBG = $(DOCKER_DEVICE_HEALTH_STEM)-$(DBG_IMAGE_MARK).gz

$(DOCKER_DEVICE_HEALTH)_DEPENDS += $(SONIC_DEVICE_HEALTH)

$(DOCKER_DEVICE_HEALTH)_DBG_DEPENDS = $($(DOCKER_CONFIG_ENGINE_BULLSEYE)_DBG_DEPENDS)
$(DOCKER_DEVICE_HEALTH)_DBG_DEPENDS += $(SONIC_DEVICE_HEALTH_DBG) $(LIBSWSSCOMMON_DBG)

$(DOCKER_DEVICE_HEALTH)_DBG_IMAGE_PACKAGES = $($(DOCKER_CONFIG_ENGINE_BULLSEYE)_DBG_IMAGE_PACKAGES)

$(DOCKER_DEVICE_HEALTH)_LOAD_DOCKERS += $(DOCKER_CONFIG_ENGINE_BULLSEYE)

$(DOCKER_DEVICE_HEALTH)_PATH = $(DOCKERS_PATH)/$(DOCKER_DEVICE_HEALTH_STEM)

$(DOCKER_DEVICE_HEALTH)_INSTALL_PYTHON_WHEELS = $(SONIC_UTILITIES_PY3)
$(DOCKER_DEVICE_HEALTH)_INSTALL_DEBS = $(PYTHON3_SWSSCOMMON)

$(DOCKER_DEVICE_HEALTH)_VERSION = 1.0.0
$(DOCKER_DEVICE_HEALTH)_PACKAGE_NAME = device-health

SONIC_DOCKER_IMAGES += $(DOCKER_DEVICE_HEALTH)
ifeq ($(INCLUDE_DEVICE_HEALTH), y)
SONIC_INSTALL_DOCKER_IMAGES += $(DOCKER_DEVICE_HEALTH)
endif

SONIC_DOCKER_DBG_IMAGES += $(DOCKER_DEVICE_HEALTH_DBG)
ifeq ($(INCLUDE_DEVICE_HEALTH), y)
SONIC_INSTALL_DOCKER_DBG_IMAGES += $(DOCKER_DEVICE_HEALTH_DBG)
endif

$(DOCKER_DEVICE_HEALTH)_CONTAINER_NAME = device-health
$(DOCKER_DEVICE_HEALTH)_RUN_OPT += --privileged -t
$(DOCKER_DEVICE_HEALTH)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
$(DOCKER_DEVICE_HEALTH)_RUN_OPT += -v /usr/share/sonic/scripts:/usr/share/sonic/scripts:ro
$(DOCKER_DEVICE_HEALTH)_RUN_OPT += -v /usr/share/device_health:/usr/share/device_health:rw
$(DOCKER_DEVICE_HEALTH)_RUN_OPT += -v /var/run/dbus:/var/run/dbus:rw

SONIC_BULLSEYE_DOCKERS += $(DOCKER_DEVICE_HEALTH)
SONIC_BULLSEYE_DBG_DOCKERS += $(DOCKER_DEVICE_HEALTH_DBG)
$(DOCKER_DEVICE_HEALTH)_FILES = $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)
2 changes: 1 addition & 1 deletion scripts/dbg_files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ if [ "$DEBUG_IMG" == "y" ]
then
for i in $DEBUG_SRC_ARCHIVE_DIRS
do
find src/$i/ -name "*.c" -o -name "*.cpp" -o -name "*.h" -o -name "*.hpp" -type f
find src/$i/ -name "*.c" -o -name "*.cpp" -o -name "*.h" -o -name "*.hpp" -o -name "*.go" -o -name "*.json" -type f
done | tar -czf $DEBUG_SRC_ARCHIVE_FILE -T -
fi

6 changes: 6 additions & 0 deletions slave.mk
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ ifeq ($(SONIC_INCLUDE_SYSTEM_TELEMETRY),y)
INCLUDE_SYSTEM_TELEMETRY = y
endif

ifeq ($(INCLUDE_DEVICE_HEALTH),y)
INCLUDE_DEVICE_HEALTH = y
endif

ifeq ($(SONIC_INCLUDE_RESTAPI),y)
INCLUDE_RESTAPI = y
endif
Expand Down Expand Up @@ -413,6 +417,7 @@ $(info "VS_PREPARE_MEM" : "$(VS_PREPARE_MEM)")
$(info "INCLUDE_MGMT_FRAMEWORK" : "$(INCLUDE_MGMT_FRAMEWORK)")
$(info "INCLUDE_ICCPD" : "$(INCLUDE_ICCPD)")
$(info "INCLUDE_SYSTEM_TELEMETRY" : "$(INCLUDE_SYSTEM_TELEMETRY)")
$(info "INCLUDE_DEVICE_HEALTH" : "$(INCLUDE_DEVICE_HEALTH)")
$(info "ENABLE_HOST_SERVICE_ON_START" : "$(ENABLE_HOST_SERVICE_ON_START)")
$(info "INCLUDE_RESTAPI" : "$(INCLUDE_RESTAPI)")
$(info "INCLUDE_SFLOW" : "$(INCLUDE_SFLOW)")
Expand Down Expand Up @@ -1287,6 +1292,7 @@ $(addprefix $(TARGET_PATH)/, $(SONIC_INSTALLERS)) : $(TARGET_PATH)/% : \
export sonic_su_mode="$(SECURE_UPGRADE_MODE)"
export sonic_su_prod_signing_tool="/sonic/scripts/$(shell basename -- $(SECURE_UPGRADE_PROD_SIGNING_TOOL))"
export include_system_telemetry="$(INCLUDE_SYSTEM_TELEMETRY)"
export include_device_health="$(INCLUDE_DEVICE_HEALTH)"
export include_restapi="$(INCLUDE_RESTAPI)"
export include_nat="$(INCLUDE_NAT)"
export include_p4rt="$(INCLUDE_P4RT)"
Expand Down
6 changes: 6 additions & 0 deletions sonic-slave-bullseye/Dockerfile.j2
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ RUN apt-get update && apt-get install -y \
libzmq5 \
libzmq3-dev \
uuid-dev \
uuid-runtime \
jq \
cron \
# For quagga build
Expand Down Expand Up @@ -472,6 +473,11 @@ RUN wget -O golang-go.deb 'https://sonicstorage.blob.core.windows.net/public/fip
&& rm golang-go.deb golang-src.deb
{%- endif %}

RUN wget -O golang-go.tar.gz https://go.dev/dl/go1.20.3.linux-amd64.tar.gz
RUN mkdir -p /usr/local/go1.20.3
RUN tar -C /usr/local/go1.20.3 -xzf golang-go.tar.gz
RUN rm -f golang-go.tar.gz

RUN pip3 install --upgrade pip
RUN apt-get purge -y python3-pip python3-yaml

Expand Down
1 change: 1 addition & 0 deletions src/sonic-device-health
Submodule sonic-device-health added at 4394f1