diff --git a/packages/backend/src/routers/healthcheck.js b/packages/backend/src/routers/healthcheck.js new file mode 100644 index 0000000000..769f9ddb68 --- /dev/null +++ b/packages/backend/src/routers/healthcheck.js @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2024 Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +"use strict" +const express = require('express'); +const router = new express.Router(); + +// -----------------------------------------------------------------------// +// GET /healthcheck +// -----------------------------------------------------------------------// +router.get('/healthcheck', async (req, res) => { + const svc_serverHealth = req.services.get('server-health'); + + const status = await svc_serverHealth.get_status(); + res.json(status); +}) +module.exports = router diff --git a/packages/backend/src/services/PuterAPIService.js b/packages/backend/src/services/PuterAPIService.js index e7f1c5a041..1427dab0d0 100644 --- a/packages/backend/src/services/PuterAPIService.js +++ b/packages/backend/src/services/PuterAPIService.js @@ -71,6 +71,7 @@ class PuterAPIService extends BaseService { app.use(require('../routers/sites')) // app.use(require('../routers/filesystem_api/stat')) app.use(require('../routers/suggest_apps')) + app.use(require('../routers/healthcheck')) app.use(require('../routers/test')) app.use(require('../routers/update-taskbar-items')) require('../routers/whoami')(app); diff --git a/packages/backend/src/services/database/SqliteDatabaseAccessService.js b/packages/backend/src/services/database/SqliteDatabaseAccessService.js index 8c7b581ae1..ccf191aad0 100644 --- a/packages/backend/src/services/database/SqliteDatabaseAccessService.js +++ b/packages/backend/src/services/database/SqliteDatabaseAccessService.js @@ -126,6 +126,17 @@ class SqliteDatabaseAccessService extends BaseDatabaseAccessService { svc_devConsole.add_widget(this.database_update_notice); })(); } + + const svc_serverHealth = this.services.get('server-health'); + + svc_serverHealth.add_check('sqlite', async () => { + const [{ user_version }] = await this._requireRead('PRAGMA user_version'); + if ( user_version !== TARGET_VERSION ) { + throw new Error( + `Database version mismatch: expected ${TARGET_VERSION}, ` + + `got ${user_version}`); + } + }); } async _read (query, params = []) { diff --git a/packages/backend/src/services/runtime-analysis/ServerHealthService.js b/packages/backend/src/services/runtime-analysis/ServerHealthService.js index 71fcf644c2..31ca1e9c38 100644 --- a/packages/backend/src/services/runtime-analysis/ServerHealthService.js +++ b/packages/backend/src/services/runtime-analysis/ServerHealthService.js @@ -19,14 +19,18 @@ const BaseService = require("../BaseService"); const { SECOND } = require("../../util/time"); const { parse_meminfo } = require("../../util/linux"); -const { asyncSafeSetInterval } = require("../../util/promise"); +const { asyncSafeSetInterval, TeePromise } = require("../../util/promise"); class ServerHealthService extends BaseService { static MODULES = { fs: require('fs'), } + _construct () { + this.checks_ = []; + this.failures_ = []; + } async _init () { - const ram_poll_interval = 10 * SECOND; + this.init_service_checks_(); /* There's an interesting thread here: @@ -53,7 +57,7 @@ class ServerHealthService extends BaseService { return; } - asyncSafeSetInterval(async () => { + this.add_check('ram-usage', async () => { const meminfo_text = await this.modules.fs.promises.readFile( '/proc/meminfo', 'utf8' ); @@ -69,11 +73,46 @@ class ServerHealthService extends BaseService { if ( meminfo.MemAvailable < min_available_KiB ) { svc_alarm.create('low-available-memory', 'Low available memory', alarm_fields); } - }, ram_poll_interval, null,{ + }); + } + + init_service_checks_ () { + const svc_alarm = this.services.get('alarm'); + asyncSafeSetInterval(async () => { + const check_failures = []; + for ( const { name, fn } of this.checks_ ) { + const p_timeout = new TeePromise(); + const timeout = setTimeout(() => { + p_timeout.reject(new Error('Health check timed out')); + }, 5 * SECOND); + try { + await Promise.race([ + fn(), + p_timeout, + ]); + clearTimeout(timeout); + } catch ( err ) { + // Trigger an alarm if this check isn't already in the failure list + + if ( this.failures_.some(v => v.name === name) ) { + return; + } + + svc_alarm.create( + 'health-check-failure', + `Health check ${name} failed`, + { error: err } + ); + check_failures.push({ name }); + } + } + + this.failures_ = check_failures; + }, 10 * SECOND, null, { onBehindSchedule: (drift) => { svc_alarm.create( - 'ram-usage-poll-behind-schedule', - 'RAM usage poll is behind schedule', + 'health-checks-behind-schedule', + 'Health checks are behind schedule', { drift } ); } @@ -83,6 +122,18 @@ class ServerHealthService extends BaseService { async get_stats () { return { ...this.stats_ }; } + + add_check (name, fn) { + this.checks_.push({ name, fn }); + } + + get_status () { + const failures = this.failures_.map(v => v.name); + return { + ok: failures.length === 0, + ...(failures.length ? { failed: failures } : {}), + }; + } } module.exports = { ServerHealthService };