From 87184ee54d3ae72f71b7802f2da4571f38b80a3c Mon Sep 17 00:00:00 2001 From: Jake Awe Date: Thu, 29 Feb 2024 11:18:09 -0600 Subject: [PATCH 1/3] add forward merger --- .github/ops-bot.yaml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .github/ops-bot.yaml diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml new file mode 100644 index 0000000..734ffeb --- /dev/null +++ b/.github/ops-bot.yaml @@ -0,0 +1,4 @@ +# This file controls which features from the `ops-bot` repository below are enabled. +# - https://github.com/rapidsai/ops-bot + +forward_merger: true From dc8ed787f3ef4d6fe800830c298de2fc2d648dc5 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 27 Mar 2024 17:12:24 -0500 Subject: [PATCH 2/3] Use `conda env create --yes` instead of `--force`. (#189) --- ci/check_style.sh | 2 +- ci/test_python.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/check_style.sh b/ci/check_style.sh index 4b54ba0..32900f6 100755 --- a/ci/check_style.sh +++ b/ci/check_style.sh @@ -11,7 +11,7 @@ rapids-dependency-file-generator \ --file_key checks \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml -rapids-mamba-retry env create --force -f env.yaml -n checks +rapids-mamba-retry env create --yes -f env.yaml -n checks conda activate checks rapids-logger "Run pre-commit checks - Python backend" diff --git a/ci/test_python.sh b/ci/test_python.sh index 8eedcf9..8558f57 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -11,7 +11,7 @@ rapids-dependency-file-generator \ --file_key test_python \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml -rapids-mamba-retry env create --force -f env.yaml -n test +rapids-mamba-retry env create --yes -f env.yaml -n test # Temporarily allow unbound variables for conda activation. set +u From 05567f726f241a879796e147f2c0ac01d4dfdc01 Mon Sep 17 00:00:00 2001 From: Ajay Thorve Date: Tue, 2 Apr 2024 11:34:29 -0700 Subject: [PATCH 3/3] [Ready for Review] add websockets instead of rest api for better response times (#188) * add websockets instead of rest api * fix pytests * rm unused import * update dependencies * allow-root for jupyterlab server within the ci container * fix testing in ci * remove logs * add SETTINGS text to config button --- ci/test_python.sh | 2 +- conda/environments/all_arch-any.yaml | 2 + dependencies.yaml | 2 + jupyterlab_nvdashboard/apps/cpu.py | 11 +- jupyterlab_nvdashboard/apps/gpu.py | 41 +++--- jupyterlab_nvdashboard/apps/utils.py | 31 +++++ jupyterlab_nvdashboard/handlers.py | 12 +- jupyterlab_nvdashboard/tests/conftest.py | 2 + .../tests/test_cpu_handlers.py | 37 ++++++ .../tests/test_gpu_handlers.py | 80 ++++++++++++ jupyterlab_nvdashboard/tests/test_handlers.py | 62 --------- pyproject.toml | 4 +- schema/config.json | 9 +- src/assets/constants.ts | 1 + src/assets/hooks.ts | 110 +++++++++++++--- src/assets/interfaces.ts | 52 ++++++++ src/charts/GpuMemoryChart.tsx | 72 +++++------ src/charts/GpuResourceChart.tsx | 88 +++++++------ src/charts/GpuUtilizationChart.tsx | 50 +++----- src/charts/MachineResourceChart.tsx | 121 ++++++++++-------- src/charts/NvLinkThroughputChart.tsx | 52 +++----- src/charts/NvLinkTimelineChart.tsx | 86 ++++++++----- src/charts/PciThroughputChart.tsx | 50 +++----- src/handler.ts | 60 ++++----- src/launchWidget.tsx | 9 +- style/base.css | 17 +++ 26 files changed, 640 insertions(+), 423 deletions(-) create mode 100644 jupyterlab_nvdashboard/apps/utils.py create mode 100644 jupyterlab_nvdashboard/tests/conftest.py create mode 100644 jupyterlab_nvdashboard/tests/test_cpu_handlers.py create mode 100644 jupyterlab_nvdashboard/tests/test_gpu_handlers.py delete mode 100644 jupyterlab_nvdashboard/tests/test_handlers.py diff --git a/ci/test_python.sh b/ci/test_python.sh index 8558f57..92f9a7d 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -18,7 +18,7 @@ set +u conda activate test set -u -# rapids-logger "Downloading artifacts from previous jobs" +rapids-logger "Downloading artifacts from previous jobs" PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) rapids-print-env diff --git a/conda/environments/all_arch-any.yaml b/conda/environments/all_arch-any.yaml index 93dd1ab..e96db4f 100644 --- a/conda/environments/all_arch-any.yaml +++ b/conda/environments/all_arch-any.yaml @@ -12,6 +12,8 @@ dependencies: - psutil - pynvml - pytest +- pytest-asyncio - pytest-jupyter[server]>=0.6.0 - python>=3.8 +- websockets name: all_arch-any diff --git a/dependencies.yaml b/dependencies.yaml index d7403c8..59c226a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -80,3 +80,5 @@ dependencies: packages: - pytest - pytest-jupyter[server]>=0.6.0 + - pytest-asyncio + - websockets diff --git a/jupyterlab_nvdashboard/apps/cpu.py b/jupyterlab_nvdashboard/apps/cpu.py index 0bbf494..ed49f72 100644 --- a/jupyterlab_nvdashboard/apps/cpu.py +++ b/jupyterlab_nvdashboard/apps/cpu.py @@ -1,13 +1,11 @@ import json import psutil import time -import tornado -from jupyter_server.base.handlers import APIHandler +from jupyterlab_nvdashboard.apps.utils import CustomWebSocketHandler -class CPUResourceHandler(APIHandler): - @tornado.web.authenticated - def get(self): +class CPUResourceWebSocketHandler(CustomWebSocketHandler): + def send_data(self): now = time.time() stats = { "time": now * 1000, @@ -18,5 +16,4 @@ def get(self): "network_read": psutil.net_io_counters().bytes_recv, "network_write": psutil.net_io_counters().bytes_sent, } - self.set_header("Content-Type", "application/json") - self.write(json.dumps(stats)) + self.write_message(json.dumps(stats)) diff --git a/jupyterlab_nvdashboard/apps/gpu.py b/jupyterlab_nvdashboard/apps/gpu.py index 4cbaa96..6ba8eee 100644 --- a/jupyterlab_nvdashboard/apps/gpu.py +++ b/jupyterlab_nvdashboard/apps/gpu.py @@ -1,8 +1,7 @@ import json +from jupyterlab_nvdashboard.apps.utils import CustomWebSocketHandler import pynvml import time -import tornado -from jupyter_server.base.handlers import APIHandler try: pynvml.nvmlInit() @@ -41,19 +40,17 @@ pci_gen = None -class GPUUtilizationHandler(APIHandler): - @tornado.web.authenticated - def get(self): +class GPUUtilizationWebSocketHandler(CustomWebSocketHandler): + def send_data(self): gpu_utilization = [ pynvml.nvmlDeviceGetUtilizationRates(gpu_handles[i]).gpu for i in range(ngpus) ] - self.finish(json.dumps({"gpu_utilization": gpu_utilization})) + self.write_message(json.dumps({"gpu_utilization": gpu_utilization})) -class GPUUsageHandler(APIHandler): - @tornado.web.authenticated - def get(self): +class GPUUsageWebSocketHandler(CustomWebSocketHandler): + def send_data(self): memory_usage = [ pynvml.nvmlDeviceGetMemoryInfo(handle).used for handle in gpu_handles @@ -64,16 +61,15 @@ def get(self): for handle in gpu_handles ] - self.finish( + self.write_message( json.dumps( {"memory_usage": memory_usage, "total_memory": total_memory} ) ) -class GPUResourceHandler(APIHandler): - @tornado.web.authenticated - def get(self): +class GPUResourceWebSocketHandler(CustomWebSocketHandler): + def send_data(self): now = time.time() stats = { "time": now * 1000, @@ -118,15 +114,13 @@ def get(self): stats["gpu_memory_total"] = round( (stats["gpu_memory_total"] / gpu_mem_sum) * 100, 2 ) - self.set_header("Content-Type", "application/json") - self.write(json.dumps(stats)) + self.write_message(json.dumps(stats)) -class NVLinkThroughputHandler(APIHandler): +class NVLinkThroughputWebSocketHandler(CustomWebSocketHandler): prev_throughput = None - @tornado.web.authenticated - def get(self): + def send_data(self): throughput = [ pynvml.nvmlDeviceGetFieldValues( handle, @@ -162,9 +156,8 @@ def get(self): # Store the current throughput for the next request self.prev_throughput = throughput - self.set_header("Content-Type", "application/json") # Send the change in throughput as part of the response - self.write( + self.write_message( json.dumps( { "nvlink_rx": [ @@ -191,9 +184,8 @@ def get(self): ) -class PCIStatsHandler(APIHandler): - @tornado.web.authenticated - def get(self): +class PCIStatsWebSocketHandler(CustomWebSocketHandler): + def send_data(self): # Use device-0 to get "upper bound" pci_width = pynvml.nvmlDeviceGetMaxPcieLinkWidth(gpu_handles[0]) pci_bw = { @@ -231,5 +223,4 @@ def get(self): "max_rxtx_tp": max_rxtx_tp, } - self.set_header("Content-Type", "application/json") - self.write(json.dumps(stats)) + self.write_message(json.dumps(stats)) diff --git a/jupyterlab_nvdashboard/apps/utils.py b/jupyterlab_nvdashboard/apps/utils.py new file mode 100644 index 0000000..9392dd1 --- /dev/null +++ b/jupyterlab_nvdashboard/apps/utils.py @@ -0,0 +1,31 @@ +from tornado.websocket import WebSocketHandler +import tornado +import json + + +class CustomWebSocketHandler(WebSocketHandler): + def open(self): + self.write_message(json.dumps({"status": "connected"})) + self.set_nodelay(True) + # Start a periodic callback to send data every 50ms + self.callback = tornado.ioloop.PeriodicCallback(self.send_data, 1000) + self.callback.start() + + def on_message(self, message): + message_data = json.loads(message) + # Update the periodic callback frequency + new_frequency = message_data["updateFrequency"] + if hasattr(self, "callback"): + self.callback.stop() + self.callback = tornado.ioloop.PeriodicCallback( + self.send_data, new_frequency + ) + if not message_data["isPaused"]: + self.callback.start() + + def on_close(self): + if hasattr(self, "callback") and self.callback.is_running(): + self.callback.stop() + + def send_data(self): + pass diff --git a/jupyterlab_nvdashboard/handlers.py b/jupyterlab_nvdashboard/handlers.py index b920b50..49e3343 100644 --- a/jupyterlab_nvdashboard/handlers.py +++ b/jupyterlab_nvdashboard/handlers.py @@ -26,13 +26,13 @@ def setup_handlers(web_app): base_url, URL_PATH, "nvlink_throughput" ) handlers += [ - (route_pattern_gpu_util, apps.gpu.GPUUtilizationHandler), - (route_pattern_gpu_usage, apps.gpu.GPUUsageHandler), - (route_pattern_gpu_resource, apps.gpu.GPUResourceHandler), - (route_pattern_pci_stats, apps.gpu.PCIStatsHandler), + (route_pattern_gpu_util, apps.gpu.GPUUtilizationWebSocketHandler), + (route_pattern_gpu_usage, apps.gpu.GPUUsageWebSocketHandler), + (route_pattern_gpu_resource, apps.gpu.GPUResourceWebSocketHandler), + (route_pattern_pci_stats, apps.gpu.PCIStatsWebSocketHandler), ( route_pattern_nvlink_throughput, - apps.gpu.NVLinkThroughputHandler, + apps.gpu.NVLinkThroughputWebSocketHandler, ), ] @@ -41,7 +41,7 @@ def setup_handlers(web_app): ) handlers += [ - (route_pattern_cpu_resource, apps.cpu.CPUResourceHandler), + (route_pattern_cpu_resource, apps.cpu.CPUResourceWebSocketHandler), ] web_app.add_handlers(host_pattern, handlers) diff --git a/jupyterlab_nvdashboard/tests/conftest.py b/jupyterlab_nvdashboard/tests/conftest.py new file mode 100644 index 0000000..9adaffe --- /dev/null +++ b/jupyterlab_nvdashboard/tests/conftest.py @@ -0,0 +1,2 @@ +def pytest_configure(config): + config.addinivalue_line("markers", "asyncio: mark test as asyncio") diff --git a/jupyterlab_nvdashboard/tests/test_cpu_handlers.py b/jupyterlab_nvdashboard/tests/test_cpu_handlers.py new file mode 100644 index 0000000..53ce9b5 --- /dev/null +++ b/jupyterlab_nvdashboard/tests/test_cpu_handlers.py @@ -0,0 +1,37 @@ +import json +import pytest +from unittest.mock import MagicMock, patch + +from jupyterlab_nvdashboard.apps.cpu import CPUResourceWebSocketHandler + + +@pytest.fixture +def mock_handler(monkeypatch): + mock = MagicMock() + monkeypatch.setattr( + "jupyterlab_nvdashboard.apps.cpu.CustomWebSocketHandler.write_message", + mock, + ) + return mock + + +@pytest.fixture +def handler_args(): + with patch("tornado.web.Application") as mock_application, patch( + "tornado.httputil.HTTPServerRequest" + ) as mock_request: + yield mock_application, mock_request + + +def test_cpu_resource_handler(mock_handler, handler_args): + handler = CPUResourceWebSocketHandler(*handler_args) + handler.send_data() + args, _ = mock_handler.call_args + data = json.loads(args[0]) + assert "time" in data + assert "cpu_utilization" in data + assert "memory_usage" in data + assert "disk_read" in data + assert "disk_write" in data + assert "network_read" in data + assert "network_write" in data diff --git a/jupyterlab_nvdashboard/tests/test_gpu_handlers.py b/jupyterlab_nvdashboard/tests/test_gpu_handlers.py new file mode 100644 index 0000000..c204a2e --- /dev/null +++ b/jupyterlab_nvdashboard/tests/test_gpu_handlers.py @@ -0,0 +1,80 @@ +import json +import pytest +from unittest.mock import MagicMock, patch + +from jupyterlab_nvdashboard.apps.gpu import ( + GPUUtilizationWebSocketHandler, + GPUUsageWebSocketHandler, + GPUResourceWebSocketHandler, + NVLinkThroughputWebSocketHandler, + PCIStatsWebSocketHandler, +) + + +@pytest.fixture +def mock_handler(monkeypatch): + mock = MagicMock() + monkeypatch.setattr( + "jupyterlab_nvdashboard.apps.gpu.CustomWebSocketHandler.write_message", + mock, + ) + return mock + + +@pytest.fixture +def handler_args(): + with patch("tornado.web.Application") as mock_application, patch( + "tornado.httputil.HTTPServerRequest" + ) as mock_request: + yield mock_application, mock_request + + +def test_gpu_utilization_handler(mock_handler, handler_args): + handler = GPUUtilizationWebSocketHandler(*handler_args) + handler.send_data() + args, _ = mock_handler.call_args + data = json.loads(args[0]) + assert "gpu_utilization" in data + + +def test_gpu_usage_handler(mock_handler, handler_args): + handler = GPUUsageWebSocketHandler(*handler_args) + handler.send_data() + args, _ = mock_handler.call_args + data = json.loads(args[0]) + assert "memory_usage" in data + assert "total_memory" in data + + +def test_gpu_resource_handler(mock_handler, handler_args): + handler = GPUResourceWebSocketHandler(*handler_args) + handler.send_data() + args, _ = mock_handler.call_args + data = json.loads(args[0]) + assert "time" in data + assert "gpu_utilization_total" in data + assert "gpu_memory_total" in data + assert "rx_total" in data + assert "tx_total" in data + assert "gpu_memory_individual" in data + assert "gpu_utilization_individual" in data + + +def test_nvlink_throughput_handler(mock_handler, handler_args): + handler = NVLinkThroughputWebSocketHandler(*handler_args) + handler.send_data() + args, _ = mock_handler.call_args + data = json.loads(args[0]) + assert "nvlink_rx" in data + assert "nvlink_tx" in data + assert "max_rxtx_bw" in data + + +def test_pci_stats_handler(mock_handler, handler_args): + handler = PCIStatsWebSocketHandler(*handler_args) + handler.send_data() + args, _ = mock_handler.call_args + data = json.loads(args[0]) + assert "pci_tx" in data + assert "pci_rx" in data + assert "max_rxtx_tp" in data diff --git a/jupyterlab_nvdashboard/tests/test_handlers.py b/jupyterlab_nvdashboard/tests/test_handlers.py deleted file mode 100644 index 26e1d6d..0000000 --- a/jupyterlab_nvdashboard/tests/test_handlers.py +++ /dev/null @@ -1,62 +0,0 @@ -import json - -URL_PATH = "nvdashboard" - - -async def test_gpu_utilization_handler(jp_fetch): - response = await jp_fetch(URL_PATH, "gpu_utilization") - assert response.code == 200 - data = json.loads(response.body.decode()) - assert "gpu_utilization" in data - - -async def test_gpu_usage_handler(jp_fetch): - response = await jp_fetch(URL_PATH, "gpu_usage") - assert response.code == 200 - data = json.loads(response.body.decode()) - assert "memory_usage" in data - assert "total_memory" in data - - -async def test_gpu_resource_handler(jp_fetch): - response = await jp_fetch(URL_PATH, "gpu_resource") - assert response.code == 200 - data = json.loads(response.body.decode()) - assert "time" in data - assert "gpu_utilization_total" in data - assert "gpu_memory_total" in data - assert "rx_total" in data - assert "tx_total" in data - assert "gpu_memory_individual" in data - assert "gpu_utilization_individual" in data - - -async def test_pci_stats_handler(jp_fetch): - response = await jp_fetch(URL_PATH, "pci_stats") - assert response.code == 200 - data = json.loads(response.body.decode()) - assert "pci_tx" in data - assert "pci_rx" in data - assert "max_rxtx_tp" in data - - -async def test_nvlink_throughput_handler(jp_fetch): - response = await jp_fetch(URL_PATH, "nvlink_throughput") - assert response.code == 200 - data = json.loads(response.body.decode()) - assert "nvlink_rx" in data - assert "nvlink_tx" in data - assert "max_rxtx_bw" in data - - -async def test_cpu_handlers(jp_fetch): - response = await jp_fetch(URL_PATH, "cpu_resource") - assert response.code == 200 - data = json.loads(response.body.decode()) - assert "time" in data - assert "cpu_utilization" in data - assert "memory_usage" in data - assert "disk_read" in data - assert "disk_write" in data - assert "network_read" in data - assert "network_write" in data diff --git a/pyproject.toml b/pyproject.toml index 015f35b..dfdf6c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,9 @@ dynamic = ["version", "description", "authors", "urls", "keywords"] [project.optional-dependencies] test = [ "pytest", + "pytest-asyncio", "pytest-jupyter[server]>=0.6.0", + "websockets", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. [tool.hatch.version] @@ -85,7 +87,7 @@ version_cmd = "hatch version" before-build-npm = [ "python -m pip install 'jupyterlab>=4.0.0,<5'", "jlpm", - "jlpm build:prod" + "jlpm build:prod", ] before-build-python = ["jlpm clean:all"] diff --git a/schema/config.json b/schema/config.json index 1f4f84a..e190eea 100644 --- a/schema/config.json +++ b/schema/config.json @@ -4,11 +4,18 @@ "type": "object", "properties": { "updateFrequency": { - "type": "integer", "title": "Frequency of Updates", "description": "The frequency of updates for the GPU Dashboard widgets, in milliseconds.", + "type": "integer", "default": 100, "minimum": 1 + }, + "maxTimeSeriesDataRecords": { + "title": "Maximum Number of Data Records", + "description": "This setting determines the maximum number of data points that can be displayed in each time series chart within Nvdashboard. To apply changes to this setting, please close and reopen the chart window", + "type": "integer", + "default": 1000, + "minimum": 10 } }, "additionalProperties": false diff --git a/src/assets/constants.ts b/src/assets/constants.ts index 743be32..9ef1124 100644 --- a/src/assets/constants.ts +++ b/src/assets/constants.ts @@ -7,3 +7,4 @@ export const WIDGET_TRACKER_NAME = 'gpu-dashboard-widgets'; export const COMMAND_OPEN_SETTINGS = 'settingeditor:open'; export const COMMAND_OPEN_WIDGET = 'gpu-dashboard-widget:open'; export const DEFAULT_UPDATE_FREQUENCY = 100; // ms +export const DEFAULT_MAX_RECORDS_TIMESERIES = 1000; // count diff --git a/src/assets/hooks.ts b/src/assets/hooks.ts index a9ee9bc..24edc18 100644 --- a/src/assets/hooks.ts +++ b/src/assets/hooks.ts @@ -1,35 +1,107 @@ import { ISettingRegistry } from '@jupyterlab/settingregistry'; -import { SetStateAction, useEffect } from 'react'; -import { DEFAULT_UPDATE_FREQUENCY, PLUGIN_ID_CONFIG } from './constants'; +import { SetStateAction, useEffect, useRef } from 'react'; +import { + DEFAULT_MAX_RECORDS_TIMESERIES, + DEFAULT_UPDATE_FREQUENCY, + PLUGIN_ID_CONFIG +} from './constants'; +import { connectToWebSocket } from '../handler'; -function loadSettingRegistry( - settingRegistry: ISettingRegistry, - setUpdateFrequency: { - (value: SetStateAction): void; - (arg0: number): void; +/** + * Updates the settings for update frequency and maximum records for time series charts. + */ +const updateSettings = ( + settings: ISettingRegistry.ISettings, + setUpdateFrequency: (value: SetStateAction) => void, + setMaxRecords?: (value: SetStateAction) => void +) => { + setUpdateFrequency( + (settings.get('updateFrequency').composite as number) || + DEFAULT_UPDATE_FREQUENCY + ); + if (setMaxRecords) { + setMaxRecords( + (settings.get('maxTimeSeriesDataRecords').composite as number) || + DEFAULT_MAX_RECORDS_TIMESERIES + ); } -) { +}; + +/** + * Loads the setting registry and updates the settings accordingly. + */ +export const loadSettingRegistry = ( + settingRegistry: ISettingRegistry, + setUpdateFrequency: (value: SetStateAction) => void, + setIsSettingsLoaded: (value: SetStateAction) => void, + setMaxRecords?: (value: SetStateAction) => void +) => { useEffect(() => { const loadSettings = async () => { try { const settings = await settingRegistry.load(PLUGIN_ID_CONFIG); - const loadedUpdateFrequency = - (settings.get('updateFrequency').composite as number) || - DEFAULT_UPDATE_FREQUENCY; - setUpdateFrequency(loadedUpdateFrequency); - + updateSettings(settings, setUpdateFrequency, setMaxRecords); settings.changed.connect(() => { - setUpdateFrequency( - (settings.get('updateFrequency').composite as number) || - DEFAULT_UPDATE_FREQUENCY - ); + updateSettings(settings, setUpdateFrequency, setMaxRecords); }); + setIsSettingsLoaded(true); } catch (error) { console.error(`An error occurred while loading settings: ${error}`); } }; loadSettings(); }, []); -} +}; + +/** + * Custom hook to establish a WebSocket connection and handle incoming messages. + */ +export const useWebSocket = ( + endpoint: string, + isPaused: boolean, + updateFrequency: number, + processData: (response: T, isPaused: boolean) => void, + isSettingsLoaded: boolean +) => { + const wsRef = useRef(null); + + useEffect(() => { + if (!isSettingsLoaded) { + return; + } + + wsRef.current = connectToWebSocket(endpoint); + const ws = wsRef.current; + + ws.onopen = () => { + console.log('WebSocket connected'); + }; -export default loadSettingRegistry; + ws.onmessage = event => { + const response = JSON.parse(event.data); + if (response.status !== 'connected') { + processData(response, isPaused); + } else { + ws.send(JSON.stringify({ updateFrequency, isPaused })); + } + }; + + ws.onerror = error => { + console.error('WebSocket error:', error); + }; + + ws.onclose = () => { + console.log('WebSocket disconnected'); + }; + + return () => { + ws.close(); + }; + }, [isSettingsLoaded]); + + useEffect(() => { + if (wsRef.current && wsRef.current.readyState === WebSocket.OPEN) { + wsRef.current.send(JSON.stringify({ updateFrequency, isPaused })); + } + }, [isPaused, updateFrequency]); +}; diff --git a/src/assets/interfaces.ts b/src/assets/interfaces.ts index 7f8118b..2886a71 100644 --- a/src/assets/interfaces.ts +++ b/src/assets/interfaces.ts @@ -18,3 +18,55 @@ export interface IWidgetInfo { title: string; instance: MainAreaWidget; } + +export interface IGpuResourceProps { + time: number; + gpu_utilization_total: number; + gpu_memory_total: number; + rx_total: number; + tx_total: number; + gpu_utilization_individual: number[]; + gpu_memory_individual: number[]; +} + +export interface IGpuUtilizationProps { + gpu_utilization: number[]; +} + +export interface IGpuUsageProps { + memory_usage: number[]; + total_memory: number[]; +} + +export interface ICPUResourceProps { + time: number; + cpu_utilization: number; + memory_usage: number; + disk_read: number; + disk_write: number; + network_read: number; + network_write: number; + disk_read_current: number; + disk_write_current: number; + network_read_current: number; + network_write_current: number; +} + +export interface INVLinkThroughputProps { + nvlink_tx: number[]; + nvlink_rx: number[]; + max_rxtx_bw: number; +} + +export interface INVLinkTimeLineProps { + time: number; + nvlink_tx: number[]; + nvlink_rx: number[]; + max_rxtx_bw: number; +} + +export interface IPCIThroughputProps { + pci_tx: number[]; + pci_rx: number[]; + max_rxtx_tp: number; +} diff --git a/src/charts/GpuMemoryChart.tsx b/src/charts/GpuMemoryChart.tsx index baed803..445893b 100644 --- a/src/charts/GpuMemoryChart.tsx +++ b/src/charts/GpuMemoryChart.tsx @@ -1,5 +1,4 @@ -import React, { useEffect, useState } from 'react'; -import { requestAPI } from '../handler'; +import React, { useState } from 'react'; import { ReactWidget } from '@jupyterlab/ui-components'; import { BarChart, @@ -19,55 +18,46 @@ import { import { format } from 'd3-format'; import AutoSizer from 'react-virtualized-auto-sizer'; import { ISettingRegistry } from '@jupyterlab/settingregistry'; -import loadSettingRegistry from '../assets/hooks'; -import { IChartProps } from '../assets/interfaces'; +import { loadSettingRegistry, useWebSocket } from '../assets/hooks'; +import { IChartProps, IGpuUsageProps } from '../assets/interfaces'; +// GpuMemoryChart component displays a bar chart representing GPU memory usage. const GpuMemoryChart: React.FC = ({ settingRegistry }): JSX.Element => { - const [gpuMemory, setGpuMemory] = useState([]); - const [gpuTotalMemory, setGpuTotalMemory] = useState([]); + const [gpuMemory, setGpuMemory] = useState({ + memory_usage: [], + total_memory: [] + }); const [updateFrequency, setUpdateFrequency] = useState( DEFAULT_UPDATE_FREQUENCY ); + const [isSettingsLoaded, setIsSettingsLoaded] = useState(false); - loadSettingRegistry(settingRegistry, setUpdateFrequency); - - useEffect(() => { - async function fetchGPUMemory() { - const response = await requestAPI('gpu_usage'); - setGpuMemory(response.memory_usage); - // set gpuTotalMemory to max of total memory array returned from API - setGpuTotalMemory(response.total_memory); - } - - fetchGPUMemory(); - }, []); - - useEffect(() => { - async function fetchGPUMemory() { - const response = await requestAPI('gpu_usage'); - setGpuMemory(response.memory_usage); - setGpuTotalMemory(response.total_memory); - } - const intervalId = setInterval(() => { - fetchGPUMemory(); - }, updateFrequency); - - return () => clearInterval(intervalId); - }, []); + // Load settings and initialize WebSocket connection + loadSettingRegistry(settingRegistry, setUpdateFrequency, setIsSettingsLoaded); + useWebSocket( + 'gpu_usage', + false, + updateFrequency, + setGpuMemory, + isSettingsLoaded + ); - const data = gpuMemory.map((memory, index) => ({ + // Prepare data for rendering + const data = gpuMemory.memory_usage.map((memory, index) => ({ name: `GPU ${index}`, memory: memory, - totalMemory: gpuTotalMemory[index] + totalMemory: gpuMemory.total_memory[index] })); - // Create a formatter for displaying bytes - + // Create a color scale for the bars const colorScale = scaleLinear().range(BAR_COLOR_LINEAR_RANGE); - const usageSum = data.reduce((sum, data) => sum + data.memory, 0); + // Calculate the sum of GPU memory usage + const usageSum = data?.reduce((sum, data) => sum + data.memory, 0); + + // Formatter for displaying bytes const formatBytes = (value: number): string => { return `${format('.2s')(value)}B`; }; @@ -76,7 +66,7 @@ const GpuMemoryChart: React.FC = ({
{' '} - GPU Memory: {formatBytes(usageSum)} + GPU Memory: {formatBytes(usageSum)}{' '} {({ height, width }: { height: number; width: number }) => ( @@ -89,7 +79,7 @@ const GpuMemoryChart: React.FC = ({ @@ -104,9 +94,7 @@ const GpuMemoryChart: React.FC = ({ {data.map((entry, index) => ( ))} @@ -117,12 +105,14 @@ const GpuMemoryChart: React.FC = ({ ); }; +// GpuMemoryChartWidget is a ReactWidget that renders the GpuMemoryChart component. export class GpuMemoryChartWidget extends ReactWidget { constructor(private settingRegistry: ISettingRegistry) { super(); this.addClass('size-constrained-widgets'); this.settingRegistry = settingRegistry; } + render(): JSX.Element { return ; } diff --git a/src/charts/GpuResourceChart.tsx b/src/charts/GpuResourceChart.tsx index 2d251f2..4844c11 100644 --- a/src/charts/GpuResourceChart.tsx +++ b/src/charts/GpuResourceChart.tsx @@ -1,67 +1,79 @@ -import React, { useState, useEffect } from 'react'; +import React, { useState } from 'react'; import { ReactWidget, Button } from '@jupyterlab/ui-components'; import { Line, XAxis, YAxis, Brush, LineChart } from 'recharts'; import AutoSizer from 'react-virtualized-auto-sizer'; -import { requestAPI } from '../handler'; import { CustomLineChart } from '../components/customLineChart'; import { formatDate, formatBytes } from '../components/formatUtils'; import { scaleLinear } from 'd3-scale'; import { + DEFAULT_MAX_RECORDS_TIMESERIES, DEFAULT_UPDATE_FREQUENCY, GPU_COLOR_CATEGORICAL_RANGE } from '../assets/constants'; import { pauseIcon, playIcon } from '../assets/icons'; -import loadSettingRegistry from '../assets/hooks'; -import { IChartProps } from '../assets/interfaces'; +import { loadSettingRegistry, useWebSocket } from '../assets/hooks'; +import { IChartProps, IGpuResourceProps } from '../assets/interfaces'; import { ISettingRegistry } from '@jupyterlab/settingregistry'; -interface IDataProps { - time: number; - gpu_utilization_total: number; - gpu_memory_total: number; - rx_total: number; - tx_total: number; - gpu_utilization_individual: number[]; - gpu_memory_individual: number[]; -} - +/** + * Component to display GPU resource charts in the Nvdashboard. + */ const GpuResourceChart: React.FC = ({ settingRegistry }) => { - const [gpuData, setGpuData] = useState([]); - const [tempData, setTempData] = useState([]); + const [gpuData, setGpuData] = useState([]); + const [tempData, setTempData] = useState([]); const [isPaused, setIsPaused] = useState(false); const ngpus = gpuData[0]?.gpu_utilization_individual.length || 0; const [updateFrequency, setUpdateFrequency] = useState( DEFAULT_UPDATE_FREQUENCY ); + const [maxRecords, setMaxRecords] = useState( + DEFAULT_MAX_RECORDS_TIMESERIES + ); + const [isSettingsLoaded, setIsSettingsLoaded] = useState(false); - loadSettingRegistry(settingRegistry, setUpdateFrequency); + // Load settings from the JupyterLab setting registry. + loadSettingRegistry( + settingRegistry, + setUpdateFrequency, + setIsSettingsLoaded, + setMaxRecords + ); - useEffect(() => { - async function fetchGpuUsage() { - const response = await requestAPI('gpu_resource'); - if (!isPaused) { - setGpuData(prevData => { - if (tempData.length > 1) { - prevData = [...prevData, ...tempData]; - } - const newData = [...prevData, response]; - return newData; - }); - setTempData([]); - } else { - setTempData([...tempData, response]); - } + // Process incoming data from the WebSocket and manage the GPU data state. + const processData = (response: IGpuResourceProps, isPaused: boolean) => { + if (!isPaused) { + setGpuData(prevData => { + let newData = + tempData.length > 1 + ? [...prevData, ...tempData, response] + : [...prevData, response]; + // Truncate data if it exceeds the maximum records limit. + if (newData.length > maxRecords) { + newData = newData.slice(-maxRecords); + } + return newData; + }); + setTempData([]); + } else { + setTempData(prevTempData => [...prevTempData, response]); } + }; - const interval = setInterval(fetchGpuUsage, updateFrequency); - - return () => clearInterval(interval); - }, [isPaused, tempData]); + // Establish a WebSocket connection and listen for data updates. + useWebSocket( + 'gpu_resource', + isPaused, + updateFrequency, + processData, + isSettingsLoaded + ); + // Handle click events for the pause/play button. const handlePauseClick = () => { setIsPaused(!isPaused); }; + // Define a color scale for the chart lines. const colorScale = scaleLinear() .domain([0, ngpus]) .range(GPU_COLOR_CATEGORICAL_RANGE); @@ -183,6 +195,7 @@ const GpuResourceChart: React.FC = ({ settingRegistry }) => { isAnimationActive={false} /> + {/* Render the control panel with the pause/play button and the time brush */}
= ({ settingRegistry }) => { ); }; +/** + * A widget for rendering the GPU resource chart in JupyterLab. + */ export class GpuResourceChartWidget extends ReactWidget { constructor(private settingRegistry: ISettingRegistry) { super(); diff --git a/src/charts/GpuUtilizationChart.tsx b/src/charts/GpuUtilizationChart.tsx index cb859e6..a73bf75 100644 --- a/src/charts/GpuUtilizationChart.tsx +++ b/src/charts/GpuUtilizationChart.tsx @@ -1,5 +1,4 @@ -import React, { useEffect, useState } from 'react'; -import { requestAPI } from '../handler'; +import React, { useState } from 'react'; import { ReactWidget } from '@jupyterlab/ui-components'; import { BarChart, @@ -18,45 +17,37 @@ import { } from '../assets/constants'; import AutoSizer from 'react-virtualized-auto-sizer'; import { ISettingRegistry } from '@jupyterlab/settingregistry'; -import { IChartProps } from '../assets/interfaces'; -import loadSettingRegistry from '../assets/hooks'; +import { IChartProps, IGpuUtilizationProps } from '../assets/interfaces'; +import { loadSettingRegistry, useWebSocket } from '../assets/hooks'; +// GpuUtilizationChart component displays a bar chart representing GPU Utilization usage. const GpuUtilizationChart: React.FC = ({ settingRegistry }): JSX.Element => { - const [gpuUtilization, setGpuUtilization] = useState([]); + const [gpuUtilization, setGpuUtilization] = useState(); const [updateFrequency, setUpdateFrequency] = useState( DEFAULT_UPDATE_FREQUENCY ); - loadSettingRegistry(settingRegistry, setUpdateFrequency); + const [isSettingsLoaded, setIsSettingsLoaded] = useState(false); - useEffect(() => { - async function fetchGPUUtilization() { - const response = await requestAPI('gpu_utilization'); - setGpuUtilization(response.gpu_utilization); - } - - fetchGPUUtilization(); - }, []); - - useEffect(() => { - async function fetchGPUUtilization() { - const response = await requestAPI('gpu_utilization'); - setGpuUtilization(response.gpu_utilization); - } - const intervalId = setInterval(() => { - fetchGPUUtilization(); - }, updateFrequency); - - return () => clearInterval(intervalId); - }, []); + // Load settings and initialize WebSocket connection + loadSettingRegistry(settingRegistry, setUpdateFrequency, setIsSettingsLoaded); + useWebSocket( + 'gpu_utilization', + false, + updateFrequency, + setGpuUtilization, + isSettingsLoaded + ); - const data = gpuUtilization.map((utilization, index) => ({ + // Prepare data for rendering + const data = gpuUtilization?.gpu_utilization.map((utilization, index) => ({ name: `GPU ${index}`, utilization: utilization })); + // Create a color scale for the bars const colorScale = scaleLinear() .domain([0, 100]) .range(BAR_COLOR_LINEAR_RANGE); @@ -97,10 +88,10 @@ const GpuUtilizationChart: React.FC = ({ /> - {data.map((entry, index) => ( + {data?.map((entry, index) => ( ))} @@ -111,6 +102,7 @@ const GpuUtilizationChart: React.FC = ({ ); }; +// GpuUtilizationChartWidget is a ReactWidget that renders the GpuUtilizationChart component. export class GpuUtilizationChartWidget extends ReactWidget { constructor(private settingRegistry: ISettingRegistry) { super(); diff --git a/src/charts/MachineResourceChart.tsx b/src/charts/MachineResourceChart.tsx index 710e7cc..4d9a1fb 100644 --- a/src/charts/MachineResourceChart.tsx +++ b/src/charts/MachineResourceChart.tsx @@ -1,84 +1,93 @@ -import React, { useState, useEffect } from 'react'; +import React, { useState } from 'react'; import { ReactWidget, Button } from '@jupyterlab/ui-components'; import { Line, XAxis, YAxis, Brush, LineChart } from 'recharts'; import AutoSizer from 'react-virtualized-auto-sizer'; -import { requestAPI } from '../handler'; import { CustomLineChart } from '../components/customLineChart'; import { formatDate, formatBytes } from '../components/formatUtils'; import { scaleLinear } from 'd3-scale'; import { + DEFAULT_MAX_RECORDS_TIMESERIES, DEFAULT_UPDATE_FREQUENCY, GPU_COLOR_CATEGORICAL_RANGE } from '../assets/constants'; import { pauseIcon, playIcon } from '../assets/icons'; import { ISettingRegistry } from '@jupyterlab/settingregistry'; -import { IChartProps } from '../assets/interfaces'; -import loadSettingRegistry from '../assets/hooks'; - -interface IDataProps { - time: number; - cpu_utilization: number; - memory_usage: number; - disk_read: number; - disk_write: number; - network_read: number; - network_write: number; - disk_read_current: number; - disk_write_current: number; - network_read_current: number; - network_write_current: number; -} +import { ICPUResourceProps, IChartProps } from '../assets/interfaces'; +import { loadSettingRegistry, useWebSocket } from '../assets/hooks'; +/** + * Component to display CPU resource charts in the Nvdashboard. + */ const MachineResourceChart: React.FC = ({ settingRegistry }) => { - const [cpuData, setCpuData] = useState([]); - const [tempData, setTempData] = useState([]); + const [cpuData, setCpuData] = useState([]); + const [tempData, setTempData] = useState([]); const [isPaused, setIsPaused] = useState(false); const [updateFrequency, setUpdateFrequency] = useState( DEFAULT_UPDATE_FREQUENCY ); + const [maxRecords, setMaxRecords] = useState( + DEFAULT_MAX_RECORDS_TIMESERIES + ); + const [isSettingsLoaded, setIsSettingsLoaded] = useState(false); - loadSettingRegistry(settingRegistry, setUpdateFrequency); - - useEffect(() => { - async function fetchCpuUsage() { - let response = await requestAPI('cpu_resource'); + // Load settings from the JupyterLab setting registry. + loadSettingRegistry( + settingRegistry, + setUpdateFrequency, + setIsSettingsLoaded, + setMaxRecords + ); - if (cpuData.length > 0) { - response = { - ...response, - disk_read_current: - response.disk_read - cpuData[cpuData.length - 1].disk_read, - disk_write_current: - response.disk_write - cpuData[cpuData.length - 1].disk_write, - network_read_current: - response.network_read - cpuData[cpuData.length - 1].network_read, - network_write_current: - response.network_write - cpuData[cpuData.length - 1].network_write - }; - } - if (!isPaused) { - setCpuData(prevData => { - if (tempData.length > 1) { - prevData = [...prevData, ...tempData]; - } - const newData = [...prevData, response]; - return newData; - }); - setTempData([]); - } else { - setTempData([...tempData, response]); - } + // Process incoming data from the WebSocket and manage the CPU data state. + const processData = (response: ICPUResourceProps, isPaused: any) => { + if (cpuData.length > 0) { + response = { + ...response, + disk_read_current: + response.disk_read - cpuData[cpuData.length - 1].disk_read, + disk_write_current: + response.disk_write - cpuData[cpuData.length - 1].disk_write, + network_read_current: + response.network_read - cpuData[cpuData.length - 1].network_read, + network_write_current: + response.network_write - cpuData[cpuData.length - 1].network_write + }; } + if (!isPaused) { + setCpuData(prevData => { + let newData; + if (tempData.length > 1) { + newData = [...prevData, ...tempData, response]; + } else { + newData = [...prevData, response]; + } + // Truncate data if it exceeds the maximum records limit. + if (newData.length > maxRecords) { + newData = newData.slice(-1 * maxRecords); + } + return newData; + }); + setTempData([]); + } else { + setTempData([...tempData, response]); + } + }; - const interval = setInterval(fetchCpuUsage, updateFrequency); - - return () => clearInterval(interval); - }, [isPaused, tempData]); + // Establish a WebSocket connection and listen for data updates. + useWebSocket( + 'cpu_resource', + isPaused, + updateFrequency, + processData, + isSettingsLoaded + ); + // Handle click events for the pause/play button. const handlePauseClick = () => { setIsPaused(!isPaused); }; + // Define a color scale for the chart lines. const colorScale = scaleLinear().range(GPU_COLOR_CATEGORICAL_RANGE); return ( @@ -169,6 +178,7 @@ const MachineResourceChart: React.FC = ({ settingRegistry }) => { isAnimationActive={false} /> + {/* Render the control panel with the pause/play button and the time brush */}
= ({ settingRegistry }) => { ); }; +/** + * A widget for rendering the Machine resource chart in JupyterLab. + */ export class MachineResourceChartWidget extends ReactWidget { constructor(private settingRegistry: ISettingRegistry) { super(); diff --git a/src/charts/NvLinkThroughputChart.tsx b/src/charts/NvLinkThroughputChart.tsx index 16e19c2..ca9ba2a 100644 --- a/src/charts/NvLinkThroughputChart.tsx +++ b/src/charts/NvLinkThroughputChart.tsx @@ -1,5 +1,4 @@ -import React, { useEffect, useState } from 'react'; -import { requestAPI } from '../handler'; +import React, { useState } from 'react'; import { ReactWidget } from '@jupyterlab/ui-components'; import { BarChart, Bar, Cell, YAxis, XAxis, Tooltip } from 'recharts'; import { scaleLinear } from 'd3-scale'; @@ -10,46 +9,29 @@ import { DEFAULT_UPDATE_FREQUENCY } from '../assets/constants'; import AutoSizer from 'react-virtualized-auto-sizer'; -import { IChartProps } from '../assets/interfaces'; -import loadSettingRegistry from '../assets/hooks'; +import { IChartProps, INVLinkThroughputProps } from '../assets/interfaces'; +import { loadSettingRegistry, useWebSocket } from '../assets/hooks'; import { ISettingRegistry } from '@jupyterlab/settingregistry'; -interface IDataProps { - nvlink_tx: number[]; - nvlink_rx: number[]; - max_rxtx_bw: number; -} - +// NvLinkThroughputChart component displays a bar chart representing nvlink throughput data. const NvLinkThroughputChart: React.FC = ({ settingRegistry }) => { - const [nvlinkStats, setNvLinkStats] = useState(); + const [nvlinkStats, setNvLinkStats] = useState(); const [updateFrequency, setUpdateFrequency] = useState( DEFAULT_UPDATE_FREQUENCY ); + const [isSettingsLoaded, setIsSettingsLoaded] = useState(false); - loadSettingRegistry(settingRegistry, setUpdateFrequency); - - useEffect(() => { - async function fetchGPUMemory() { - const response = await requestAPI('nvlink_throughput'); - console.log(response); - setNvLinkStats(response); - } - - fetchGPUMemory(); - }, []); - - useEffect(() => { - async function fetchGPUMemory() { - const response = await requestAPI('nvlink_throughput'); - setNvLinkStats(response); - } - const intervalId = setInterval(() => { - fetchGPUMemory(); - }, updateFrequency); - - return () => clearInterval(intervalId); - }, []); + // Load settings and initialize WebSocket connection + loadSettingRegistry(settingRegistry, setUpdateFrequency, setIsSettingsLoaded); + useWebSocket( + 'nvlink_throughput', + false, + updateFrequency, + setNvLinkStats, + isSettingsLoaded + ); + // Prepare data for rendering const gpuCount = nvlinkStats?.nvlink_rx.length; const data = Array.from(Array(gpuCount).keys()).map(index => ({ name: `GPU ${index}`, @@ -58,10 +40,12 @@ const NvLinkThroughputChart: React.FC = ({ settingRegistry }) => { maxTP: nvlinkStats?.max_rxtx_bw || 0 })); + // Create a color scale for the bars const colorScale = scaleLinear() .domain([0, 1]) .range(BAR_COLOR_LINEAR_RANGE); + // Formatter for displaying bytes const formatBytes = (bytes: number): string => { return `${format('.2s')(bytes)}B`; }; diff --git a/src/charts/NvLinkTimelineChart.tsx b/src/charts/NvLinkTimelineChart.tsx index 0550941..26c62d5 100644 --- a/src/charts/NvLinkTimelineChart.tsx +++ b/src/charts/NvLinkTimelineChart.tsx @@ -1,5 +1,4 @@ -import React, { useEffect, useState } from 'react'; -import { requestAPI } from '../handler'; +import React, { useState } from 'react'; import { ReactWidget, Button } from '@jupyterlab/ui-components'; import AutoSizer from 'react-virtualized-auto-sizer'; import { CustomLineChart } from '../components/customLineChart'; @@ -8,58 +7,76 @@ import { formatDate, formatBytes } from '../components/formatUtils'; import { pauseIcon, playIcon } from '../assets/icons'; import { scaleLinear } from 'd3-scale'; import { + DEFAULT_MAX_RECORDS_TIMESERIES, DEFAULT_UPDATE_FREQUENCY, GPU_COLOR_CATEGORICAL_RANGE } from '../assets/constants'; import { ISettingRegistry } from '@jupyterlab/settingregistry'; -import { IChartProps } from '../assets/interfaces'; -import loadSettingRegistry from '../assets/hooks'; - -interface IDataProps { - time: number; - nvlink_tx: number[]; - nvlink_rx: number[]; - max_rxtx_bw: number; -} +import { IChartProps, INVLinkTimeLineProps } from '../assets/interfaces'; +import { loadSettingRegistry, useWebSocket } from '../assets/hooks'; +/** + * Component to display Nvlink stats in a timeseries format in the Nvdashboard. + */ const NvLinkTimelineChart: React.FC = ({ settingRegistry }) => { - const [nvlinkStats, setNvLinkStats] = useState([]); - const [tempData, setTempData] = useState([]); + const [nvlinkStats, setNvLinkStats] = useState([]); + const [tempData, setTempData] = useState([]); const [isPaused, setIsPaused] = useState(false); const ngpus = nvlinkStats[0]?.nvlink_tx.length || 0; const [updateFrequency, setUpdateFrequency] = useState( DEFAULT_UPDATE_FREQUENCY ); + const [maxRecords, setMaxRecords] = useState( + DEFAULT_MAX_RECORDS_TIMESERIES + ); + const [isSettingsLoaded, setIsSettingsLoaded] = useState(false); - loadSettingRegistry(settingRegistry, setUpdateFrequency); + // Load settings from the JupyterLab setting registry. + loadSettingRegistry( + settingRegistry, + setUpdateFrequency, + setIsSettingsLoaded, + setMaxRecords + ); - useEffect(() => { - async function fetchNvLinkStats() { - const response = await requestAPI('nvlink_throughput'); - response.time = Date.now(); - if (!isPaused) { - setNvLinkStats(prevData => { - if (tempData.length > 1) { - prevData = [...prevData, ...tempData]; - } - const newData = [...prevData, response]; - return newData; - }); - setTempData([]); - } else { - setTempData([...tempData, response]); - } + // Process incoming data from the WebSocket and manage the nvlink stats. + const processData = (response: INVLinkTimeLineProps, isPaused: any) => { + response.time = Date.now(); + if (!isPaused) { + setNvLinkStats(prevData => { + let newData; + if (tempData.length > 1) { + newData = [...prevData, ...tempData, response]; + } else { + newData = [...prevData, response]; + } + // Truncate data if it exceeds 1000 records + if (newData.length > maxRecords) { + newData = newData.slice(-1 * maxRecords); + } + return newData; + }); + setTempData([]); + } else { + setTempData([...tempData, response]); } + }; - const interval = setInterval(fetchNvLinkStats, updateFrequency); - - return () => clearInterval(interval); - }, [isPaused, tempData]); + // Establish a WebSocket connection and listen for data updates. + useWebSocket( + 'nvlink_throughput', + isPaused, + updateFrequency, + processData, + isSettingsLoaded + ); + // Handle click events for the pause/play button. const handlePauseClick = () => { setIsPaused(!isPaused); }; + // Define a color scale for the chart lines. const colorScale = scaleLinear() .domain([0, ngpus]) .range(GPU_COLOR_CATEGORICAL_RANGE); @@ -119,6 +136,7 @@ const NvLinkTimelineChart: React.FC = ({ settingRegistry }) => { ) )} + {/* Render the control panel with the pause/play button and the time brush */}
= ({ settingRegistry }) => { - const [pciStats, setPciStats] = useState(); + const [pciStats, setPciStats] = useState(); const [updateFrequency, setUpdateFrequency] = useState( DEFAULT_UPDATE_FREQUENCY ); + const [isSettingsLoaded, setIsSettingsLoaded] = useState(false); - loadSettingRegistry(settingRegistry, setUpdateFrequency); - - useEffect(() => { - async function fetchGPUMemory() { - const response = await requestAPI('pci_stats'); - console.log(response); - setPciStats(response); - } - - fetchGPUMemory(); - }, []); - - useEffect(() => { - async function fetchGPUMemory() { - const response = await requestAPI('pci_stats'); - setPciStats(response); - } - const intervalId = setInterval(() => { - fetchGPUMemory(); - }, updateFrequency); - - return () => clearInterval(intervalId); - }, []); + // Load settings and initialize WebSocket connection + loadSettingRegistry(settingRegistry, setUpdateFrequency, setIsSettingsLoaded); + useWebSocket( + 'nvlink_throughput', + false, + updateFrequency, + setPciStats, + isSettingsLoaded + ); + // Prepare data for rendering const gpuCount = pciStats?.pci_tx.length; const data = Array.from(Array(gpuCount).keys()).map(index => ({ name: `GPU ${index}`, @@ -57,6 +40,7 @@ const PciThroughputChart: React.FC = ({ settingRegistry }) => { maxTP: pciStats?.max_rxtx_tp || 0 })); + // Create a color scale for the bars const colorScale = scaleLinear() .domain([0, 1]) .range(BAR_COLOR_LINEAR_RANGE); diff --git a/src/handler.ts b/src/handler.ts index 47c0300..9c177c3 100644 --- a/src/handler.ts +++ b/src/handler.ts @@ -1,46 +1,32 @@ -import { URLExt } from '@jupyterlab/coreutils'; - -import { ServerConnection } from '@jupyterlab/services'; - /** - * Call the API extension + * Connect to a WebSocket API endpoint * - * @param endPoint API REST end point for the extension - * @param init Initial values for the request - * @returns The response body interpreted as JSON + * @param endPoint WebSocket endpoint for the extension + * @returns A WebSocket object connected to the endpoint */ -export async function requestAPI( - endPoint = '', - init: RequestInit = {} -): Promise { - // Make request to Jupyter API - const settings = ServerConnection.makeSettings(); - const requestUrl = URLExt.join( - settings.baseUrl, - 'nvdashboard', // API Namespace - endPoint - ); +export function connectToWebSocket(endPoint = '') { + const baseUrl = document.location.origin.replace(/^http/, 'ws'); + const wsUrl = new URL(`nvdashboard/${endPoint}`, baseUrl); + + const ws = new WebSocket(wsUrl.href); - let response: Response; - try { - response = await ServerConnection.makeRequest(requestUrl, init, settings); - } catch (error) { - throw new ServerConnection.NetworkError(error as any); - } + ws.onopen = () => { + console.log(`WebSocket connected to ${endPoint}`); + ws.send(JSON.stringify({ message: 'Client connected' })); + }; - let data: any = await response.text(); + ws.onmessage = event => { + const data = JSON.parse(event.data); + console.log(`Data received from ${endPoint}:`, data); + }; - if (data.length > 0) { - try { - data = JSON.parse(data); - } catch (error) { - console.log('Not a JSON response body.', response); - } - } + ws.onerror = error => { + console.error(`WebSocket error on ${endPoint}:`, error); + }; - if (!response.ok) { - throw new ServerConnection.ResponseError(response, data.message || data); - } + ws.onclose = () => { + console.log(`WebSocket disconnected from ${endPoint}`); + }; - return data; + return ws; } diff --git a/src/launchWidget.tsx b/src/launchWidget.tsx index 9342f3b..cec7ab0 100644 --- a/src/launchWidget.tsx +++ b/src/launchWidget.tsx @@ -24,7 +24,7 @@ import { DEFAULT_UPDATE_FREQUENCY, PLUGIN_ID_OPEN_SETTINGS } from './assets/constants'; -import loadSettingRegistry from './assets/hooks'; +import { loadSettingRegistry } from './assets/hooks'; // Control component for the GPU Dashboard, which contains buttons to open the GPU widgets const Control: React.FC = ({ @@ -39,7 +39,9 @@ const Control: React.FC = ({ DEFAULT_UPDATE_FREQUENCY ); - loadSettingRegistry(settingRegistry, setUpdateFrequency); + const [isSettingsLoaded, setIsSettingsLoaded] = useState(false); + + loadSettingRegistry(settingRegistry, setUpdateFrequency, setIsSettingsLoaded); if (!app.commands.hasCommand(COMMAND_OPEN_WIDGET)) { // Add command to open GPU Dashboard Widget @@ -138,6 +140,7 @@ const Control: React.FC = ({ className="header-button" onClick={() => app.commands.execute(PLUGIN_ID_OPEN_SETTINGS)} > +
settings
@@ -200,7 +203,7 @@ const Control: React.FC = ({

- Updated every {updateFrequency}ms + {isSettingsLoaded && `Updated every ${updateFrequency}ms`}
diff --git a/style/base.css b/style/base.css index 364ef30..7479c00 100644 --- a/style/base.css +++ b/style/base.css @@ -107,6 +107,22 @@ box-shadow: 0 0 5px 2px #ff7900; /* Reduced blur for the glowing effect on hover */ } +.nv-header-icon-text { + font-family: 'Courier New', Courier, monospace; + word-spacing: 5px; + font-variant: all-small-caps; + color: #ff7900; + margin-right: 3px; + font-size: 15px; +} + +.nv-header-icon-text:hover { + text-shadow: + -0.25px 0 #ff7900, + 0 0.25px #ff7900, + 0.25px 0 #ff7900, + 0 -0.25px #ff7900; +} .chart-title { padding-left: 62px; font-size: 22px; @@ -153,6 +169,7 @@ .nv-header-icon svg path { fill: #ff7900 !important; + font-size: 5px !important; } .nv-icon-custom {