From eec2e0ab7a53848316e84ac3454afd01579f59d8 Mon Sep 17 00:00:00 2001 From: Antoine Eiche Date: Sat, 17 Aug 2024 20:09:54 +0200 Subject: [PATCH] Detect if the host need to be rebooted And expose it via Prometheus: # HELP comin_host_info Info of the host. # TYPE comin_host_info gauge comin_host_info{need_to_reboot="1"} 1 --- internal/manager/manager.go | 6 ++++++ internal/prometheus/prometheus.go | 20 +++++++++++++++++++- internal/utils/reboot.go | 28 ++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 internal/utils/reboot.go diff --git a/internal/manager/manager.go b/internal/manager/manager.go index 713a8f7..ca890e7 100644 --- a/internal/manager/manager.go +++ b/internal/manager/manager.go @@ -44,6 +44,7 @@ type Manager struct { // for a first iteration: this needs to be removed isRunning bool needToBeRestarted bool + needToReboot bool cominServiceRestartFunc func() error evalFunc generation.EvalFunc @@ -153,6 +154,8 @@ func (m Manager) onDeployment(ctx context.Context, deploymentResult deployment.D if getsEvicted && evicted.ProfilePath != "" { profile.RemoveProfilePath(evicted.ProfilePath) } + m.needToReboot = utils.NeedToReboot() + m.prometheus.SetHostInfo(m.needToReboot) return m } @@ -211,6 +214,9 @@ func (m Manager) Run() { logrus.Infof(" machineId = %s", m.machineId) logrus.Infof(" repositoryPath = %s", m.repositoryPath) + m.needToReboot = utils.NeedToReboot() + m.prometheus.SetHostInfo(m.needToReboot) + for { select { case <-m.stateRequestCh: diff --git a/internal/prometheus/prometheus.go b/internal/prometheus/prometheus.go index d4f2297..a7a6305 100644 --- a/internal/prometheus/prometheus.go +++ b/internal/prometheus/prometheus.go @@ -12,6 +12,7 @@ type Prometheus struct { buildInfo *prometheus.GaugeVec deploymentInfo *prometheus.GaugeVec fetchCounter *prometheus.CounterVec + hostInfo *prometheus.GaugeVec } func New() Prometheus { @@ -19,7 +20,7 @@ func New() Prometheus { buildInfo := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "comin_build_info", Help: "Build info for comin.", - }, []string{"version"}) + }, []string{"version"}) deploymentInfo := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "comin_deployment_info", Help: "Info of the last deployment.", @@ -28,14 +29,20 @@ func New() Prometheus { Name: "comin_fetch_count", Help: "Number of fetches per status", }, []string{"remote_name", "status"}) + hostInfo := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "comin_host_info", + Help: "Info of the host.", + }, []string{"need_to_reboot"}) promReg.MustRegister(buildInfo) promReg.MustRegister(deploymentInfo) promReg.MustRegister(fetchCounter) + promReg.MustRegister(hostInfo) return Prometheus{ promRegistry: promReg, buildInfo: buildInfo, deploymentInfo: deploymentInfo, fetchCounter: fetchCounter, + hostInfo: hostInfo, } } @@ -60,3 +67,14 @@ func (m Prometheus) SetDeploymentInfo(commitId, status string) { m.deploymentInfo.Reset() m.deploymentInfo.With(prometheus.Labels{"commit_id": commitId, "status": status}).Set(1) } + +func (m Prometheus) SetHostInfo(needToReboot bool) { + m.hostInfo.Reset() + var value string + if needToReboot { + value = "1" + } else { + value = "0" + } + m.hostInfo.With(prometheus.Labels{"need_to_reboot": value}).Set(1) +} diff --git a/internal/utils/reboot.go b/internal/utils/reboot.go new file mode 100644 index 0000000..1727ec1 --- /dev/null +++ b/internal/utils/reboot.go @@ -0,0 +1,28 @@ +package utils + +import ( + "os" + + "github.com/sirupsen/logrus" +) + +// NeedToReboot return true when the current deployed kernel is not +// the booted kernel. Note we should implement something smarter such +// as described in +// https://discourse.nixos.org/t/nixos-needsreboot-determine-if-you-need-to-reboot-your-nixos-machine/40790 +func NeedToReboot() (reboot bool) { + current, err := os.Readlink("/run/current-system/kernel") + if err != nil { + logrus.Errorf("Failed to read the symlink /run/current-system/kernel: %s", err) + return + } + booted, err := os.Readlink("/run/booted-system/kernel") + if err != nil { + logrus.Errorf("Failed to read the symlink /run/booted-system/kernel: %s", err) + return + } + if current != booted { + reboot = true + } + return +}