From 924e4de1296e348bce2f675ed13b0809930b9755 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sat, 13 Apr 2024 15:07:03 +0200 Subject: [PATCH] Replace global dashboard with activity dashboard The activity.jsonnet dashboard is extracting four panels from the jupyterhub.jsonnet dashboard. The running servers panel and daily/weekly/monhtly active users panels. By doing this, we get a dashboard with only those four panels, grouped into a row. This row is then made repeatable over the prometheus datasource variable. With a repeating row, we could end up with very many rows, making any panels below hard to get to. Due to that, it makes sense to use repeating rows in a dedicated dashboard. The global dashboard that is being deleted made use of hardcoded state in the grafana instance it was being deployed to. This then also required regular re-deploys of the dashboard just to update the hardcoded entries. Further, the global dashboard couldn't be pre-rendered and re-used across grafana instances - because it had grafance instance specific state within it. With the new Activity dashboard, we can avoid all this complexity and possibly publish pre-rendered dashboards that any grafana instance can install directly. --- .github/workflows/test.yaml | 4 +- .pre-commit-config.yaml | 8 +- dashboards/activity.jsonnet | 138 +++++++++++++++++++ dashboards/jupyterhub.jsonnet | 101 -------------- deploy.py | 15 +- docs/contributing.md | 3 +- docs/tutorials/deploy.md | 15 +- global-dashboards/README.md | 3 - global-dashboards/global-usage-stats.jsonnet | 51 ------- 9 files changed, 154 insertions(+), 184 deletions(-) create mode 100644 dashboards/activity.jsonnet delete mode 100644 global-dashboards/README.md delete mode 100755 global-dashboards/global-usage-stats.jsonnet diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 97ce860..cbb4cc0 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -43,10 +43,10 @@ jobs: - name: Render dashboards run: | mkdir rendered-dashboards - dashboard_folders="dashboards global-dashboards" + dashboard_folders="dashboards" for file in `find $dashboard_folders -name '*.jsonnet'` do - jsonnet -J vendor --tla-code 'datasources=["prometheus-test"]' --output-file rendered-dashboards/`basename $file` $file + jsonnet -J vendor --output-file rendered-dashboards/`basename $file` $file done - name: Install dashboard-linter diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 80e5821..de20fc9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,6 +16,10 @@ repos: - id: jsonnet-format # To workaround https://github.com/google/go-jsonnet/issues/591, we run # the jsonnet-lint hook once per .jsonnet / .libsonnet file. + - id: jsonnet-lint + pass_filenames: false + name: jsonnet-lint cluster.jsonnet + args: [-J, vendor, dashboards/activity.jsonnet] - id: jsonnet-lint pass_filenames: false name: jsonnet-lint cluster.jsonnet @@ -44,10 +48,6 @@ repos: pass_filenames: false name: jsonnet-lint user.jsonnet args: [-J, vendor, dashboards/user.jsonnet] - - id: jsonnet-lint - pass_filenames: false - name: jsonnet-lint global-usage-stats.jsonnet - args: [-J, vendor, global-dashboards/global-usage-stats.jsonnet] # Autoformat: Python code, syntax patterns are modernized - repo: https://github.com/asottile/pyupgrade diff --git a/dashboards/activity.jsonnet b/dashboards/activity.jsonnet new file mode 100644 index 0000000..1706c63 --- /dev/null +++ b/dashboards/activity.jsonnet @@ -0,0 +1,138 @@ +#!/usr/bin/env -S jsonnet -J ../vendor +local grafonnet = import 'grafonnet/main.libsonnet'; +local dashboard = grafonnet.dashboard; +local ts = grafonnet.panel.timeSeries; +local prometheus = grafonnet.query.prometheus; +local row = grafonnet.panel.row; +local var = grafonnet.dashboard.variable; + +local common = import './common.libsonnet'; + +local activeUserTsOptions = + common.tsOptions + + ts.standardOptions.withDecimals(0) + // stacking is used here as the total number of users is as relevant as the + // number of users per hub + + ts.fieldConfig.defaults.custom.stacking.withMode('normal') + // stepAfter is used here as these metrics indicate what has happened the time + // before the metric is read + + ts.fieldConfig.defaults.custom.withLineInterpolation('stepAfter') + + ts.panelOptions.withDescription( + ||| + Number of unique users who were active within the preceding period. + ||| + ) +; + +local runningServers = + common.tsOptions + + ts.new('Running Servers') + + ts.standardOptions.withDecimals(0) + + ts.fieldConfig.defaults.custom.stacking.withMode('normal') + + ts.fieldConfig.defaults.custom.withLineInterpolation('stepBefore') + + ts.panelOptions.withDescription( + ||| + Number of running user servers at any given time. + + Note that a single user could have multiple servers running if the + JupyterHub is configured with `c.JupyterHub.allow_named_servers = True`. + ||| + ) + + ts.queryOptions.withTargets([ + prometheus.new( + '$PROMETHEUS_DS', + ||| + max( + jupyterhub_running_servers{namespace=~"$hub"} + ) by (namespace) + ||| + ) + + prometheus.withLegendFormat('{{ namespace }}'), + ]); + +local dailyActiveUsers = + activeUserTsOptions + + ts.new('Daily Active Users') + + ts.queryOptions.withTargets([ + prometheus.new( + '$PROMETHEUS_DS', + ||| + max( + jupyterhub_active_users{period="24h", namespace=~"$hub"} + ) by (namespace) + ||| + ) + + prometheus.withLegendFormat('{{ namespace }}'), + ]); + +local weeklyActiveUsers = + activeUserTsOptions + + ts.new('Weekly Active Users') + + ts.queryOptions.withTargets([ + prometheus.new( + '$PROMETHEUS_DS', + ||| + max( + jupyterhub_active_users{period="7d", namespace=~"$hub"} + ) by (namespace) + ||| + ) + + prometheus.withLegendFormat('{{ namespace }}'), + ]); + +local monthlyActiveUsers = + activeUserTsOptions + + ts.new('Monthly Active Users') + + ts.queryOptions.withTargets([ + prometheus.new( + '$PROMETHEUS_DS', + ||| + max( + jupyterhub_active_users{period="30d", namespace=~"$hub"} + ) by (namespace) + |||, + ) + + prometheus.withLegendFormat('{{ namespace }}'), + ]); + + +dashboard.new('Activity') ++ dashboard.withTags(['jupyterhub']) ++ dashboard.withUid('jhgd-activity') ++ dashboard.withEditable(true) ++ dashboard.time.withFrom('now-90d') ++ dashboard.withVariables([ + /* + * This dashboard repeats the single row it defines once per datasource, due + * to that we allow multiple or all datasources to be selected in this + * dashboard but not in others. This repeating is only usable for repeating + * panels or rows, as individual panels can't repeat queries based on the + * available datasources. + */ + common.variables.prometheus + + var.query.selectionOptions.withMulti() + + var.query.selectionOptions.withIncludeAll(), + /* + * The hub variable will behave weirdly when multiple datasources are selected, + * only showing hubs from one datasource. This is currently an accepted issue. + * Many deployments of these dashboard will only be in a Grafana instance with + * a single prometheus datasource. + */ + common.variables.hub, +]) ++ dashboard.withPanels( + grafonnet.util.grid.makeGrid( + [ + row.new('Activity ($PROMETHEUS_DS)') + + row.withPanels([ + runningServers, + dailyActiveUsers, + weeklyActiveUsers, + monthlyActiveUsers, + ]) + + row.withRepeat('PROMETHEUS_DS'), + ], + panelWidth=6, + panelHeight=8, + ) +) diff --git a/dashboards/jupyterhub.jsonnet b/dashboards/jupyterhub.jsonnet index 7aa7c90..3469723 100755 --- a/dashboards/jupyterhub.jsonnet +++ b/dashboards/jupyterhub.jsonnet @@ -12,100 +12,6 @@ local row = grafonnet.panel.row; local common = import './common.libsonnet'; local jupyterhub = import 'jupyterhub.libsonnet'; -// Hub usage stats -local currentActiveUsers = - common.tsOptions - + ts.new('Currently Active Users') - + ts.standardOptions.withDecimals(0) - + ts.fieldConfig.defaults.custom.stacking.withMode('normal') - + ts.queryOptions.withTargets([ - prometheus.new( - '$PROMETHEUS_DS', - ||| - sum( - group( - kube_pod_status_phase{phase="Running"} - ) by (label_component, pod, namespace) - %s - ) by (namespace) - ||| - % jupyterhub.onComponentLabel('singleuser-server', group_right=''), - ) - + prometheus.withLegendFormat('{{ namespace }}'), - ]); - -local dailyActiveUsers = - common.tsOptions - + ts.new('Daily Active Users') - + ts.panelOptions.withDescription( - ||| - Number of unique users who were active within the preceding 24h period. - - Requires JupyterHub 3.1. - |||, - ) - + ts.standardOptions.withDecimals(0) - + ts.fieldConfig.defaults.custom.stacking.withMode('normal') - + ts.queryOptions.withTargets([ - prometheus.new( - '$PROMETHEUS_DS', - ||| - max( - jupyterhub_active_users{period="24h", namespace=~"$hub"} - ) by (namespace) - ||| - ) - + prometheus.withLegendFormat('{{ namespace }}'), - ]); - -local weeklyActiveUsers = - common.tsOptions - + ts.new('Weekly Active Users') - + ts.panelOptions.withDescription( - ||| - Number of unique users who were active within the preceeding 7d period. - - Requires JupyterHub 3.1. - ||| - ) - + ts.standardOptions.withDecimals(0) - + ts.fieldConfig.defaults.custom.stacking.withMode('normal') - + ts.queryOptions.withTargets([ - prometheus.new( - '$PROMETHEUS_DS', - ||| - max( - jupyterhub_active_users{period="7d", namespace=~"$hub"} - ) by (namespace) - ||| - ) - + prometheus.withLegendFormat('{{ namespace }}'), - ]); - -local monthlyActiveUsers = - common.tsOptions - + ts.new('Monthly Active Users') - + ts.panelOptions.withDescription( - ||| - Number of unique users who were active within the preceeding 7d period. - - Requires JupyterHub 3.1. - ||| - ) - + ts.standardOptions.withDecimals(0) - + ts.fieldConfig.defaults.custom.stacking.withMode('normal') - + ts.queryOptions.withTargets([ - prometheus.new( - '$PROMETHEUS_DS', - ||| - max( - jupyterhub_active_users{period="30d", namespace=~"$hub"} - ) by (namespace) - |||, - ) - + prometheus.withLegendFormat('{{ namespace }}'), - ]); - local userMemoryDistribution = common.heatmapOptions + heatmap.new('User memory usage distribution') @@ -597,13 +503,6 @@ dashboard.new('JupyterHub Dashboard') + dashboard.withPanels( grafonnet.util.grid.makeGrid( [ - row.new('Hub usage stats') - + row.withPanels([ - currentActiveUsers, - dailyActiveUsers, - weeklyActiveUsers, - monthlyActiveUsers, - ]), row.new('Container Images') + row.withPanels([ notebookImagesUsed, diff --git a/deploy.py b/deploy.py index c9f3d99..1cf8694 100755 --- a/deploy.py +++ b/deploy.py @@ -65,14 +65,6 @@ def build_dashboard(dashboard_path, api): Returns JSON representing a Grafana dashboard by rendering an individual `.jsonnet` dashboard template with `jsonnet`. """ - # global-dashboards/global-usage-stats.json needs to be rendered with - # information about the grafana instance's datasources in order to show info - # about all datasources in a single panel. Due to that, we also ask the - # Grafana instance we are to deploy to about its datasources and then pass - # them to `jsonnet` when rendering via the `--tla-code` flag. - datasources = api("/datasources") - datasources_names = [ds["name"] for ds in datasources] - dashboard = json.loads( subprocess.check_output( [ @@ -80,10 +72,9 @@ def build_dashboard(dashboard_path, api): "-J", "vendor", dashboard_path, - "--tla-code", - f"datasources={datasources_names}", - ] - ).decode() + ], + text=True, + ) ) if not dashboard: raise ValueError(f"jsonnet render of {dashboard_path} led to an empty object") diff --git a/docs/contributing.md b/docs/contributing.md index 6763133..6fd7c4c 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -32,8 +32,7 @@ Dashboards are `.json` files generated from `.jsonnet` files using `jsonnet` like this: ```shell -# --tla-code flag is currently only relevant for global-dashboards -jsonnet -J vendor --tla-code 'datasources=["prometheus-test"]' dashboards/cluster.json +jsonnet -J vendor dashboards/cluster.json ``` To tweak dashboard settings in the `.jsonnet` files can be tricky. One way to do diff --git a/docs/tutorials/deploy.md b/docs/tutorials/deploy.md index 6d749ac..b3502c8 100644 --- a/docs/tutorials/deploy.md +++ b/docs/tutorials/deploy.md @@ -226,21 +226,18 @@ You will likely only need to adjust the `claimName` above to use this example. There's a helper `deploy.py` script that can deploy the dashboards to any grafana installation. ```bash -export GRAFANA_TOKEN=" +# note the leading space in the command below, it makes the +# sensitive command not be stored in your shell history + export GRAFANA_TOKEN=" + ./deploy.py ``` This creates a folder called 'JupyterHub Default Dashboards' in your grafana, and adds a couple of dashboards to it. -If your Grafana deployment supports more than one datasource, then apart from the default dashboards in the [`dashboards` directory](https://github.com/jupyterhub/grafana-dashboards/tree/main/dashboards), you should also consider deploying apart the dashboards in [`global-dashboards` directory](https://github.com/jupyterhub/grafana-dashboards/tree/main/global-dashboards). - -```bash -export GRAFANA_TOKEN=" -./deploy.py --dashboards-dir global-dashboards -``` - -The global dashboards will use the list of available dashboards in your Grafana provided to them and will build dashboards across all of them. +The Activity dashboard is unique because it repeats rows of panels for every +prometheus datasource accessible by Grafana. If your Grafana instance uses a self-signed certificate, use the `--no-tls-verify` flag when executing the `deploy.py` script. For example: diff --git a/global-dashboards/README.md b/global-dashboards/README.md deleted file mode 100644 index 7f5064d..0000000 --- a/global-dashboards/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Dashboards across all datasources - -Contains "global" dashboards with useful stats computed across all datasources. diff --git a/global-dashboards/global-usage-stats.jsonnet b/global-dashboards/global-usage-stats.jsonnet deleted file mode 100755 index 36f17b1..0000000 --- a/global-dashboards/global-usage-stats.jsonnet +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env -S jsonnet -J ../vendor --tla-code 'datasources=["prometheus-test"]' -// Deploys one dashboard - "Global usage dashboard", -// with useful stats about usage across all datasources -local grafonnet = import 'grafonnet/main.libsonnet'; -local dashboard = grafonnet.dashboard; -local barGauge = grafonnet.panel.barGauge; -local prometheus = grafonnet.query.prometheus; - -function(datasources) - local weeklyActiveUsers = - barGauge.new('Active users (over 7 days)') - + barGauge.standardOptions.color.withMode('fixed') - + barGauge.standardOptions.color.withFixedColor('green') - + barGauge.queryOptions.withInterval('7d') - + barGauge.queryOptions.withTargets([ - prometheus.new( - x, - // Removes any pods caused by stress testing - ||| - count( - sum( - min_over_time( - kube_pod_labels{ - label_app="jupyterhub", - label_component="singleuser-server", - label_hub_jupyter_org_username!~"(service|perf|hubtraf)-", - }[7d] - ) - ) by (pod) - ) - |||, - ) - + prometheus.withLegendFormat(x) - // Create a target for each datasource - for x in datasources - ]); - - dashboard.new('Global Usage Dashboard') - + dashboard.withUid('global-usage-dashboard') - + dashboard.withTags(['jupyterhub', 'global']) - + dashboard.withEditable(true) - + dashboard.time.withFrom('now-7d') - + dashboard.withPanels( - grafonnet.util.grid.makeGrid( - [ - weeklyActiveUsers, - ], - panelWidth=24, - panelHeight=10, - ) - )