From 1af8c4507e53e1526539a47d4e48425ae436bc65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Walski?= Date: Wed, 20 Mar 2024 13:54:13 +0100 Subject: [PATCH 1/7] gpu-info subproject --- .vscode/launch.json | 68 +++++++++++++++++++++++++ Cargo.lock | 113 +++++++++++++++++++++++++++++++++++++++--- Cargo.toml | 5 +- gpu-info/Cargo.toml | 12 +++++ gpu-info/README.md | 5 ++ gpu-info/src/lib.rs | 81 ++++++++++++++++++++++++++++++ gpu-info/src/model.rs | 43 ++++++++++++++++ src/offer_template.rs | 1 + 8 files changed, 320 insertions(+), 8 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 gpu-info/Cargo.toml create mode 100644 gpu-info/README.md create mode 100644 gpu-info/src/lib.rs create mode 100644 gpu-info/src/model.rs diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..6ff8aac --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,68 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "offer-template", + "cargo": { + "args": [ + "build", + "--bin=ya-runtime-ai", + "--package=ya-runtime-ai" + ], + "filter": { + "name": "ya-runtime-ai", + "kind": "bin" + } + }, + "args": [ + "--runtime", + "dummy", + "offer-template" + ], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "Debug unit tests in executable 'ya-runtime-ai'", + "cargo": { + "args": [ + "test", + "--no-run", + "--bin=ya-runtime-ai", + "--package=ya-runtime-ai" + ], + "filter": { + "name": "ya-runtime-ai", + "kind": "bin" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "Debug integration test 'runtime_config'", + "cargo": { + "args": [ + "test", + "--no-run", + "--test=runtime_config", + "--package=ya-runtime-ai" + ], + "filter": { + "name": "runtime_config", + "kind": "test" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + } + ] +} diff --git a/Cargo.lock b/Cargo.lock index c302ee1..8b780e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -368,9 +368,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.79" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" +checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" [[package]] name = "assert-json-diff" @@ -869,6 +869,41 @@ dependencies = [ "typenum", ] +[[package]] +name = "darling" +version = "0.20.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim 0.10.0", + "syn 2.0.48", +] + +[[package]] +name = "darling_macro" +version = "0.20.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.48", +] + [[package]] name = "deranged" version = "0.3.11" @@ -1314,6 +1349,15 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "gpu-info" +version = "0.1.0" +dependencies = [ + "anyhow", + "nvml-wrapper", + "serde", +] + [[package]] name = "h2" version = "0.3.23" @@ -1511,6 +1555,12 @@ dependencies = [ "cc", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.5.0" @@ -1633,6 +1683,16 @@ version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" +[[package]] +name = "libloading" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +dependencies = [ + "cfg-if 1.0.0", + "windows-targets 0.52.0", +] + [[package]] name = "libredox" version = "0.0.1" @@ -1876,6 +1936,29 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "nvml-wrapper" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c9bff0aa1d48904a1385ea2a8b97576fbdcbc9a3cfccd0d31fe978e1c4038c5" +dependencies = [ + "bitflags 2.4.1", + "libloading", + "nvml-wrapper-sys", + "static_assertions", + "thiserror", + "wrapcenum-derive", +] + +[[package]] +name = "nvml-wrapper-sys" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "698d45156f28781a4e79652b6ebe2eaa0589057d588d3aec1333f6466f13fcb5" +dependencies = [ + "libloading", +] + [[package]] name = "object" version = "0.32.2" @@ -2582,9 +2665,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.195" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] @@ -2600,9 +2683,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.195" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", @@ -2725,6 +2808,12 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.8.0" @@ -3556,6 +3645,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "wrapcenum-derive" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a76ff259533532054cfbaefb115c613203c73707017459206380f03b3b3f266e" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "xattr" version = "1.3.1" diff --git a/Cargo.toml b/Cargo.toml index fa91f3d..70293cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,8 +6,9 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [workspace] -members = [ - "runtimes/dummy" +members = [ + "gpu-info", + "runtimes/dummy", ] [dependencies] diff --git a/gpu-info/Cargo.toml b/gpu-info/Cargo.toml new file mode 100644 index 0000000..fd11441 --- /dev/null +++ b/gpu-info/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "gpu-info" +version = "0.1.0" +edition = "2021" +readme = "README.md" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0.81" +nvml-wrapper = "0.10.0" +serde = "1.0.197" diff --git a/gpu-info/README.md b/gpu-info/README.md new file mode 100644 index 0000000..59b59e5 --- /dev/null +++ b/gpu-info/README.md @@ -0,0 +1,5 @@ +# gpu-info + +Library provides GPU info listed in [GAP-35](https://github.com/golemfactory/golem-architecture/blob/master/gaps/gap-35_gpu_pci_capability/gap-35_gpu_pci_capability.md). + +It supports Nvidia GPUs only. Implementation uses [nvml-wrapper](https://crates.io/crates/nvml-wrapper) to access [NVML](https://developer.nvidia.com/nvidia-management-library-nvml). diff --git a/gpu-info/src/lib.rs b/gpu-info/src/lib.rs new file mode 100644 index 0000000..850eaf3 --- /dev/null +++ b/gpu-info/src/lib.rs @@ -0,0 +1,81 @@ +use model::{Cuda, Gpu}; +use nvml_wrapper::{bitmasks::InitFlags, Device, Nvml}; +use anyhow::{bail, Context, Result}; + +pub mod model; + + +/* testing +let nvml = Nvml::init()?; +// Get the first `Device` (GPU) in the system +log::info!("Cuda version: {}", nvml.sys_cuda_driver_version().expect("Can get CUDA version")); +match nvml.device_count() { + Ok(count) => { + for index in 0..count { + match nvml.device_by_index(index) { + Ok(dev) => { + log::info!("Device index: {index}"); + log::info!("Device name: {}", dev.name().expect("Can get device name")); + log::info!("Device cores: {}", dev.num_cores().expect("Can get device cores")); + log::info!("Device mem info: {:?}", dev.memory_info().unwrap()); + log::info!("Device cuda compute capability: {:?}", dev.cuda_compute_capability().unwrap()); + log::info!("Device GPU clock info: {:?}", dev.clock_info(nvml_wrapper::enum_wrappers::device::Clock::Graphics).unwrap()); + log::info!("Device Memory clock info: {:?}", dev.clock_info(nvml_wrapper::enum_wrappers::device::Clock::Memory).unwrap()); + log::info!("Device SM clock info: {:?}", dev.clock_info(nvml_wrapper::enum_wrappers::device::Clock::SM).unwrap()); + log::info!("Device Video clock info: {:?}", dev.clock_info(nvml_wrapper::enum_wrappers::device::Clock::Video).unwrap()); + log::info!("Device memory bus width: {:?}", dev.memory_bus_width().unwrap()); + log::info!("Device memory clocks: {:?}", dev.supported_memory_clocks().unwrap()); + // max supported_memory_clocks *2 == memTransferRatemax + + }, + Err(err) => log::error!("Failed to get GPU {index} info. Err: {err}"), + } + } + }, + Err(err) => log::error!("Failed to get GPU info. Err: {err}"), +} +*/ + +pub struct GpuInfo { + nvml: Nvml, +} + +impl GpuInfo { + pub fn init() -> anyhow::Result { + let nvml = Nvml::builder().flags(InitFlags::NO_ATTACH).init()?; + return Ok(Self { nvml }); + } + + /// `uuid` of GPU device. If not provided first available GPU device will be used. + pub fn info>(&self, uuid: Option<&str>) -> Result { + if let Some(uuid) = uuid { + let dev = self.nvml.device_by_uuid(uuid).with_context(|| format!("Failed to get GPU device with UUID: {uuid}."))?; + return self.device_info(dev); + }; + + let gpu_count = self.nvml.device_count().with_context(|| "Unable to get count of CUDA devices.")?; + if gpu_count == 0 { + bail!("No supported GPU device available.") + } + + let dev = self.nvml.device_by_index(0).with_context(|| "Failed to get GPU device.")?; + self.device_info(dev) + } + + fn device_info(&self, dev: Device) -> Result { + let compute_capability = dev.cuda_compute_capability()?; + todo!() + } + + fn cuda(&self) -> Result { + todo!() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() {} +} diff --git a/gpu-info/src/model.rs b/gpu-info/src/model.rs new file mode 100644 index 0000000..9f2dff5 --- /dev/null +++ b/gpu-info/src/model.rs @@ -0,0 +1,43 @@ +use serde::Serialize; + +#[derive(Clone, Debug, Serialize)] +pub struct Gpu { + model: String, + cuda: Cuda, + clocks: Clocks, + memory: Memory, +} + +#[derive(Clone, Debug, Serialize)] +pub struct Cuda { + enabled: bool, + cores: u32, + version: String, + capability: Capability, +} + +#[derive(Clone, Debug, Serialize)] +pub struct Capability { + major: u32, + minor: u32, +} + +#[derive(Clone, Debug, Serialize)] +pub struct Clocks { + #[serde(rename(deserialize = "graphics.mhz"))] + graphics_mhz: u32, + #[serde(rename(deserialize = "memory.mhz"))] + memory_mhz: u32, + #[serde(rename(deserialize = "sm.mhz"))] + sm_mhz: u32, + #[serde(rename(deserialize = "video.mhz"))] + video_mhz: u32, +} + +#[derive(Clone, Debug, Serialize)] +pub struct Memory { + #[serde(rename(deserialize = "bandwidth.gib"))] + bandwidth_gib: u32, + #[serde(rename(deserialize = "tatal.gib"))] + tatal_gib: u32 +} diff --git a/src/offer_template.rs b/src/offer_template.rs index 839cebb..72bb180 100644 --- a/src/offer_template.rs +++ b/src/offer_template.rs @@ -30,6 +30,7 @@ fn extract_device_info(device_info: BTreeMap) -> Opti } pub fn parse_devices_info() -> anyhow::Result> { + //TODO remove it if let Ok(exe) = process::find_file("device_detection.exe") { let output = Command::new(exe) .arg("ocl") From 9c95d75c36bc5fc424c2b07610159b91838861b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Walski?= Date: Thu, 21 Mar 2024 18:02:26 +0100 Subject: [PATCH 2/7] gpu-info memory.bandwidth_gib --- gpu-info/src/lib.rs | 144 +++++++++++++++++++++++++++++------------- gpu-info/src/model.rs | 47 ++++++-------- 2 files changed, 121 insertions(+), 70 deletions(-) diff --git a/gpu-info/src/lib.rs b/gpu-info/src/lib.rs index 850eaf3..31f7578 100644 --- a/gpu-info/src/lib.rs +++ b/gpu-info/src/lib.rs @@ -1,41 +1,12 @@ -use model::{Cuda, Gpu}; -use nvml_wrapper::{bitmasks::InitFlags, Device, Nvml}; -use anyhow::{bail, Context, Result}; +use anyhow::{bail, Context}; +use model::{Clocks, Cuda, Gpu, Memory}; +use nvml_wrapper::{ + bitmasks::InitFlags, enum_wrappers::device::Clock, structs::device::CudaComputeCapability, + Device, Nvml, +}; pub mod model; - -/* testing -let nvml = Nvml::init()?; -// Get the first `Device` (GPU) in the system -log::info!("Cuda version: {}", nvml.sys_cuda_driver_version().expect("Can get CUDA version")); -match nvml.device_count() { - Ok(count) => { - for index in 0..count { - match nvml.device_by_index(index) { - Ok(dev) => { - log::info!("Device index: {index}"); - log::info!("Device name: {}", dev.name().expect("Can get device name")); - log::info!("Device cores: {}", dev.num_cores().expect("Can get device cores")); - log::info!("Device mem info: {:?}", dev.memory_info().unwrap()); - log::info!("Device cuda compute capability: {:?}", dev.cuda_compute_capability().unwrap()); - log::info!("Device GPU clock info: {:?}", dev.clock_info(nvml_wrapper::enum_wrappers::device::Clock::Graphics).unwrap()); - log::info!("Device Memory clock info: {:?}", dev.clock_info(nvml_wrapper::enum_wrappers::device::Clock::Memory).unwrap()); - log::info!("Device SM clock info: {:?}", dev.clock_info(nvml_wrapper::enum_wrappers::device::Clock::SM).unwrap()); - log::info!("Device Video clock info: {:?}", dev.clock_info(nvml_wrapper::enum_wrappers::device::Clock::Video).unwrap()); - log::info!("Device memory bus width: {:?}", dev.memory_bus_width().unwrap()); - log::info!("Device memory clocks: {:?}", dev.supported_memory_clocks().unwrap()); - // max supported_memory_clocks *2 == memTransferRatemax - - }, - Err(err) => log::error!("Failed to get GPU {index} info. Err: {err}"), - } - } - }, - Err(err) => log::error!("Failed to get GPU info. Err: {err}"), -} -*/ - pub struct GpuInfo { nvml: Nvml, } @@ -47,31 +18,116 @@ impl GpuInfo { } /// `uuid` of GPU device. If not provided first available GPU device will be used. - pub fn info>(&self, uuid: Option<&str>) -> Result { + pub fn info>(&self, uuid: Option<&str>) -> anyhow::Result { if let Some(uuid) = uuid { - let dev = self.nvml.device_by_uuid(uuid).with_context(|| format!("Failed to get GPU device with UUID: {uuid}."))?; + let dev = self + .nvml + .device_by_uuid(uuid) + .with_context(|| format!("Failed to get GPU device with UUID: {uuid}."))?; return self.device_info(dev); }; - let gpu_count = self.nvml.device_count().with_context(|| "Unable to get count of CUDA devices.")?; + let gpu_count = self + .nvml + .device_count() + .context("Unable to get count of CUDA devices.")?; + if gpu_count == 0 { bail!("No supported GPU device available.") } - let dev = self.nvml.device_by_index(0).with_context(|| "Failed to get GPU device.")?; + let dev = self + .nvml + .device_by_index(0) + .context("Failed to get GPU device.")?; + self.device_info(dev) } - fn device_info(&self, dev: Device) -> Result { - let compute_capability = dev.cuda_compute_capability()?; - todo!() + fn device_info(&self, dev: Device) -> anyhow::Result { + let model = dev.name()?; + let version = self.cuda_version()?; + let cuda = cuda(&dev, version)?; + let clocks = clocks(&dev)?; + let memory = memory(&dev)?; + Ok(Gpu { + model, + cuda, + clocks, + memory, + }) } - fn cuda(&self) -> Result { - todo!() + fn cuda_version(&self) -> anyhow::Result { + let version = self + .nvml + .sys_cuda_driver_version() + .context("Unable to get driver version")?; + let version_major = nvml_wrapper::cuda_driver_version_major(version); + let version_minor = nvml_wrapper::cuda_driver_version_minor(version); + Ok(format!("{}.{}", version_major, version_minor)) } } +fn cuda(dev: &Device, version: String) -> anyhow::Result { + let enabled = true; + let cores = dev.num_cores()?; + let compute_capability = compute_capability(dev)?; + Ok(Cuda { + enabled, + cores, + version, + compute_capability, + }) +} + +fn compute_capability(dev: &Device) -> anyhow::Result { + let capability = dev.cuda_compute_capability()?; + Ok(format!("{}.{}", capability.major, capability.minor)) +} + +fn clocks(dev: &Device) -> anyhow::Result { + let graphics_mhz = hz_to_mhz(dev.clock_info(Clock::Graphics)?); + let memory_mhz = hz_to_mhz(dev.clock_info(Clock::Memory)?); + let sm_mhz = hz_to_mhz(dev.clock_info(Clock::SM)?); + let video_mhz = hz_to_mhz(dev.clock_info(Clock::Video)?); + Ok(Clocks { + graphics_mhz, + memory_mhz, + sm_mhz, + video_mhz, + }) +} + +fn memory(dev: &Device) -> anyhow::Result { + let total_bytes = dev.memory_info()?.total; + let total_gib = bytes_to_gib(total_bytes); + let bandwidth_gib = bandwidth_gib(dev)?; + Ok(Memory { + bandwidth_gib, + total_gib, + }) +} + +fn bandwidth_gib(dev: &Device) -> anyhow::Result { + let memory_bus_width = dev.memory_bus_width()?; + let supported_memory_clocks = dev.supported_memory_clocks()?; + let max_memory_clock = supported_memory_clocks.iter().cloned().fold(0, u32::max); + // `nvml` does not provide `memTransferRatemax` like `nvidia-settings` tool does. + // Transfer rate is a result of memory clock, bus width, and memory specific multiplier (for DDR it is 2) + let data_rate = 2; // value for DDR + let bandwidth_gib = max_memory_clock * memory_bus_width * data_rate / (1000 * 8); + Ok(bandwidth_gib) +} + +fn bytes_to_gib(memory: u64) -> f32 { + (memory as f64 / 1024.0 / 1024.0 / 1024.0) as f32 +} + +fn hz_to_mhz(hz: u32) -> u32 { + hz / 1000_000 +} + #[cfg(test)] mod tests { use super::*; diff --git a/gpu-info/src/model.rs b/gpu-info/src/model.rs index 9f2dff5..fa85380 100644 --- a/gpu-info/src/model.rs +++ b/gpu-info/src/model.rs @@ -1,43 +1,38 @@ use serde::Serialize; +#[serde(rename_all = "kebab-case")] #[derive(Clone, Debug, Serialize)] pub struct Gpu { - model: String, - cuda: Cuda, - clocks: Clocks, - memory: Memory, + pub model: String, + pub cuda: Cuda, + pub clocks: Clocks, + pub memory: Memory, } #[derive(Clone, Debug, Serialize)] pub struct Cuda { - enabled: bool, - cores: u32, - version: String, - capability: Capability, -} - -#[derive(Clone, Debug, Serialize)] -pub struct Capability { - major: u32, - minor: u32, + pub enabled: bool, + pub cores: u32, + pub version: String, + pub compute_capability: String, } #[derive(Clone, Debug, Serialize)] pub struct Clocks { - #[serde(rename(deserialize = "graphics.mhz"))] - graphics_mhz: u32, - #[serde(rename(deserialize = "memory.mhz"))] - memory_mhz: u32, - #[serde(rename(deserialize = "sm.mhz"))] - sm_mhz: u32, - #[serde(rename(deserialize = "video.mhz"))] - video_mhz: u32, + #[serde(rename(serialize = "graphics.mhz"))] + pub graphics_mhz: u32, + #[serde(rename(serialize = "memory.mhz"))] + pub memory_mhz: u32, + #[serde(rename(serialize = "sm.mhz"))] + pub sm_mhz: u32, + #[serde(rename(serialize = "video.mhz"))] + pub video_mhz: u32, } #[derive(Clone, Debug, Serialize)] pub struct Memory { - #[serde(rename(deserialize = "bandwidth.gib"))] - bandwidth_gib: u32, - #[serde(rename(deserialize = "tatal.gib"))] - tatal_gib: u32 + #[serde(rename(serialize = "bandwidth.gib"))] + pub bandwidth_gib: u32, + #[serde(rename(serialize = "tatal.gib"))] + pub total_gib: f32 } From eaf2d1dd34cf13bfdff7e8cd75da3aef5671a664 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Walski?= Date: Fri, 22 Mar 2024 13:06:58 +0100 Subject: [PATCH 3/7] gpu info in offer-template (for GPU requiring runtimes only) --- Cargo.lock | 1 + Cargo.toml | 1 + gpu-info/src/lib.rs | 38 +++++----------- gpu-info/src/model.rs | 9 ++-- src/main.rs | 6 ++- src/offer_template.rs | 81 ++++++--------------------------- src/process.rs | 9 +++- src/process/automatic.rs | 4 ++ src/process/automatic/config.rs | 15 ++++++ src/process/dummy.rs | 16 ++++++- 10 files changed, 78 insertions(+), 102 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8b780e4..6a409c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3807,6 +3807,7 @@ dependencies = [ "env_logger", "flexi_logger", "futures 0.3.30", + "gpu-info", "http 0.2.11", "humantime", "humantime-serde", diff --git a/Cargo.toml b/Cargo.toml index 70293cb..fe56fbe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ ya-service-bus = "0.7.1" ya-client-model = "0.6.0" ya-agreement-utils = "0.5" ya-transfer = { git = "https://github.com/golemfactory/yagna.git", rev = "fb9a89b11c815b5b6b664ca6c8b1151524420aa3" } +gpu-info = { path = "gpu-info" } actix = "0.13" actix-rt = "2" diff --git a/gpu-info/src/lib.rs b/gpu-info/src/lib.rs index 31f7578..75c9e7b 100644 --- a/gpu-info/src/lib.rs +++ b/gpu-info/src/lib.rs @@ -1,9 +1,6 @@ use anyhow::{bail, Context}; use model::{Clocks, Cuda, Gpu, Memory}; -use nvml_wrapper::{ - bitmasks::InitFlags, enum_wrappers::device::Clock, structs::device::CudaComputeCapability, - Device, Nvml, -}; +use nvml_wrapper::{enum_wrappers::device::Clock, Device, Nvml}; pub mod model; @@ -13,17 +10,16 @@ pub struct GpuInfo { impl GpuInfo { pub fn init() -> anyhow::Result { - let nvml = Nvml::builder().flags(InitFlags::NO_ATTACH).init()?; - return Ok(Self { nvml }); + let nvml = Nvml::init()?; + Ok(Self { nvml }) } /// `uuid` of GPU device. If not provided first available GPU device will be used. - pub fn info>(&self, uuid: Option<&str>) -> anyhow::Result { + pub fn info>(&self, uuid: Option) -> anyhow::Result { if let Some(uuid) = uuid { - let dev = self - .nvml - .device_by_uuid(uuid) - .with_context(|| format!("Failed to get GPU device with UUID: {uuid}."))?; + let dev = self.nvml.device_by_uuid(uuid.as_ref()).with_context(|| { + format!("Failed to get GPU device with UUID: {}.", uuid.as_ref()) + })?; return self.device_info(dev); }; @@ -87,10 +83,10 @@ fn compute_capability(dev: &Device) -> anyhow::Result { } fn clocks(dev: &Device) -> anyhow::Result { - let graphics_mhz = hz_to_mhz(dev.clock_info(Clock::Graphics)?); - let memory_mhz = hz_to_mhz(dev.clock_info(Clock::Memory)?); - let sm_mhz = hz_to_mhz(dev.clock_info(Clock::SM)?); - let video_mhz = hz_to_mhz(dev.clock_info(Clock::Video)?); + let graphics_mhz = dev.max_clock_info(Clock::Graphics)?; + let memory_mhz = dev.max_clock_info(Clock::Memory)?; + let sm_mhz = dev.max_clock_info(Clock::SM)?; + let video_mhz = dev.max_clock_info(Clock::Video)?; Ok(Clocks { graphics_mhz, memory_mhz, @@ -123,15 +119,3 @@ fn bandwidth_gib(dev: &Device) -> anyhow::Result { fn bytes_to_gib(memory: u64) -> f32 { (memory as f64 / 1024.0 / 1024.0 / 1024.0) as f32 } - -fn hz_to_mhz(hz: u32) -> u32 { - hz / 1000_000 -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() {} -} diff --git a/gpu-info/src/model.rs b/gpu-info/src/model.rs index fa85380..6713e76 100644 --- a/gpu-info/src/model.rs +++ b/gpu-info/src/model.rs @@ -1,7 +1,7 @@ use serde::Serialize; -#[serde(rename_all = "kebab-case")] #[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "kebab-case")] pub struct Gpu { pub model: String, pub cuda: Cuda, @@ -10,6 +10,7 @@ pub struct Gpu { } #[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "kebab-case")] pub struct Cuda { pub enabled: bool, pub cores: u32, @@ -18,6 +19,7 @@ pub struct Cuda { } #[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "kebab-case")] pub struct Clocks { #[serde(rename(serialize = "graphics.mhz"))] pub graphics_mhz: u32, @@ -30,9 +32,10 @@ pub struct Clocks { } #[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "kebab-case")] pub struct Memory { #[serde(rename(serialize = "bandwidth.gib"))] pub bandwidth_gib: u32, - #[serde(rename(serialize = "tatal.gib"))] - pub total_gib: f32 + #[serde(rename(serialize = "total.gib"))] + pub total_gib: f32, } diff --git a/src/main.rs b/src/main.rs index c6e467f..ecd655a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -178,7 +178,7 @@ async fn run( ) -> anyhow::Result<()> { dotenv::dotenv().ok(); - let runtime_config = Box::pin(RUNTIME::parse_config(&cli.runtime_config)?); + let runtime_config = RUNTIME::parse_config(&cli.runtime_config)?; log::info!("Runtime config: {runtime_config:?}"); let (exe_unit_url, report_url, activity_id, args) = match &cli.command { @@ -194,7 +194,7 @@ async fn run( args, ), Command::OfferTemplate => { - let template = offer_template::template()?; + let template = offer_template::template(&runtime_config)?; io::stdout().write_all(template.as_ref())?; return Ok(()); } @@ -204,6 +204,8 @@ async fn run( } }; + let runtime_config = Box::pin(runtime_config); + let agreement_path = args.agreement.clone(); let agreement = AgreementDesc::load(agreement_path)?; diff --git a/src/offer_template.rs b/src/offer_template.rs index 72bb180..30ff4de 100644 --- a/src/offer_template.rs +++ b/src/offer_template.rs @@ -1,57 +1,7 @@ -use crate::process; +use crate::process::RuntimeConfig; use serde::{Deserialize, Serialize}; use std::borrow::Cow; use std::collections::BTreeMap; -use std::process::{Command, Stdio}; - -#[derive(Deserialize)] -#[serde(rename_all = "PascalCase")] -struct OclResponse { - error_string: String, - platforms: Vec, -} - -#[derive(Deserialize)] -#[serde(rename_all = "PascalCase")] -struct OclPlatform { - devices: Vec>, -} - -fn extract_device_info(device_info: BTreeMap) -> Option<(String, u64)> { - match ( - device_info.get("_CL_DEVICE_NAME"), - device_info.get("_CL_DEVICE_GLOBAL_MEM_SIZE"), - ) { - (Some(serde_json::Value::String(name)), Some(serde_json::Value::Number(mem))) => { - Some((name.clone(), mem.as_u64().unwrap_or_default())) - } - _ => None, - } -} - -pub fn parse_devices_info() -> anyhow::Result> { - //TODO remove it - if let Ok(exe) = process::find_file("device_detection.exe") { - let output = Command::new(exe) - .arg("ocl") - .stdin(Stdio::null()) - .stderr(Stdio::inherit()) - .stdout(Stdio::piped()) - .output()?; - let response: OclResponse = serde_json::from_slice(output.stdout.as_ref())?; - if !response.error_string.is_empty() { - eprintln!("detection error: {}", response.error_string); - } - Ok(response - .platforms - .into_iter() - .flat_map(|d| d.devices.into_iter().filter_map(extract_device_info)) - .collect()) - } else { - eprintln!("not found device detection"); - Ok(Vec::new()) - } -} #[derive(Deserialize, Serialize)] struct OfferTemplate { @@ -59,25 +9,20 @@ struct OfferTemplate { constraints: String, } -pub fn template() -> anyhow::Result> { +pub fn template( + config: &CONFIG, +) -> anyhow::Result> { let offer_template = include_bytes!("offer-template.json"); let mut template: OfferTemplate = serde_json::from_slice(offer_template.as_ref())?; - let devices = parse_devices_info()?; - if devices.is_empty() { - return Ok(Cow::Owned(serde_json::to_vec_pretty(&template)?)); + + if CONFIG::uses_gpu() { + let gpu_info = gpu_info::GpuInfo::init()?; + let gpu = gpu_info.info(config.gpu_uuid())?; + let gpu = serde_json::value::to_value(gpu)?; + template + .properties + .insert("golem.!exp.gap-35.v1.inf.gpu".into(), gpu); } - template.properties.insert( - "golem.inf.gpu.card".to_string(), - serde_json::Value::Array( - devices - .iter() - .map(|(name, _)| serde_json::Value::from(name.as_str())) - .collect(), - ), - ); - template.properties.insert( - "golem.inf.gpu.mem".to_string(), - serde_json::Value::Array(devices.iter().map(|&(_, mem)| mem.into()).collect()), - ); + Ok(Cow::Owned(serde_json::to_vec_pretty(&template)?)) } diff --git a/src/process.rs b/src/process.rs index 814ace6..4bbfa4c 100644 --- a/src/process.rs +++ b/src/process.rs @@ -26,7 +26,7 @@ pub struct Usage { #[async_trait] pub(crate) trait Runtime: Sized { - type CONFIG: DeserializeOwned + Default + Debug + Clone; + type CONFIG: RuntimeConfig; fn parse_config(config: &Option) -> anyhow::Result { match config { @@ -40,6 +40,13 @@ pub(crate) trait Runtime: Sized { async fn stop(&mut self) -> anyhow::Result<()>; async fn wait(&mut self) -> std::io::Result; + + fn requires_gpu() -> bool; +} + +pub(crate) trait RuntimeConfig: DeserializeOwned + Default + Debug + Clone { + fn gpu_uuid(&self) -> Option; + fn uses_gpu() -> bool; } #[derive(Clone)] diff --git a/src/process/automatic.rs b/src/process/automatic.rs index b94bdb3..5723cc8 100644 --- a/src/process/automatic.rs +++ b/src/process/automatic.rs @@ -83,6 +83,10 @@ impl Runtime for Automatic { log::debug!("Automatic process has stopped"); res } + + fn requires_gpu() -> bool { + true + } } fn build_cmd(model: Option, config: &Config) -> anyhow::Result { diff --git a/src/process/automatic/config.rs b/src/process/automatic/config.rs index 1da028d..dabd017 100644 --- a/src/process/automatic/config.rs +++ b/src/process/automatic/config.rs @@ -2,6 +2,8 @@ use std::time::Duration; use serde::Deserialize; +use crate::process::RuntimeConfig; + #[derive(Deserialize, Clone, Debug)] #[serde(default)] pub(crate) struct Config { @@ -29,6 +31,18 @@ pub(crate) struct Config { pub monitored_model_failure_msg: String, pub monitored_msgs_w_trace_lvl: Vec, + + pub gpu_uuid: Option, +} + +impl RuntimeConfig for Config { + fn gpu_uuid(&self) -> Option { + self.gpu_uuid.clone() + } + + fn uses_gpu() -> bool { + true + } } impl Default for Config { @@ -52,6 +66,7 @@ impl Default for Config { // log generated by API ping task "\"GET / HTTP/1.1\" 404 Not Found".into(), ], + gpu_uuid: None, } } } diff --git a/src/process/dummy.rs b/src/process/dummy.rs index ea2788c..62495d2 100644 --- a/src/process/dummy.rs +++ b/src/process/dummy.rs @@ -8,7 +8,7 @@ use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::{Child, Command}; use tokio::sync::Mutex; -use super::Runtime; +use super::{Runtime, RuntimeConfig}; #[derive(Clone)] pub struct Dummy { @@ -25,6 +25,16 @@ pub(crate) struct Config { pub dummy_arg: Option, } +impl RuntimeConfig for Config { + fn gpu_uuid(&self) -> Option { + None + } + + fn uses_gpu() -> bool { + false + } +} + #[async_trait] impl Runtime for Dummy { type CONFIG = Config; @@ -75,4 +85,8 @@ impl Runtime for Dummy { let mut child = self.child.lock().await; child.wait().await } + + fn requires_gpu() -> bool { + false + } } From cdf3ad5e0f30d32c065e79aa265cf8b8eb289291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Walski?= Date: Fri, 22 Mar 2024 16:28:20 +0100 Subject: [PATCH 4/7] Skipped optional gpu-info memory.bandwidth_gib property. --- .vscode/launch.json | 27 +++++++++++++++++++++++---- gpu-info/src/lib.rs | 5 +++-- gpu-info/src/model.rs | 3 ++- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 6ff8aac..4024cb3 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,13 +1,10 @@ { - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ { "type": "lldb", "request": "launch", - "name": "offer-template", + "name": "offer-template dummy", "cargo": { "args": [ "build", @@ -26,6 +23,28 @@ ], "cwd": "${workspaceFolder}" }, + { + "type": "lldb", + "request": "launch", + "name": "offer-template automatic", + "cargo": { + "args": [ + "build", + "--bin=ya-runtime-ai", + "--package=ya-runtime-ai" + ], + "filter": { + "name": "ya-runtime-ai", + "kind": "bin" + } + }, + "args": [ + "--runtime", + "automatic", + "offer-template" + ], + "cwd": "${workspaceFolder}" + }, { "type": "lldb", "request": "launch", diff --git a/gpu-info/src/lib.rs b/gpu-info/src/lib.rs index 75c9e7b..da67a6a 100644 --- a/gpu-info/src/lib.rs +++ b/gpu-info/src/lib.rs @@ -98,13 +98,14 @@ fn clocks(dev: &Device) -> anyhow::Result { fn memory(dev: &Device) -> anyhow::Result { let total_bytes = dev.memory_info()?.total; let total_gib = bytes_to_gib(total_bytes); - let bandwidth_gib = bandwidth_gib(dev)?; Ok(Memory { - bandwidth_gib, + bandwidth_gib: None, total_gib, }) } +/// Unused because of lack of `memTransferRatemax` property. +#[allow(dead_code)] fn bandwidth_gib(dev: &Device) -> anyhow::Result { let memory_bus_width = dev.memory_bus_width()?; let supported_memory_clocks = dev.supported_memory_clocks()?; diff --git a/gpu-info/src/model.rs b/gpu-info/src/model.rs index 6713e76..0456291 100644 --- a/gpu-info/src/model.rs +++ b/gpu-info/src/model.rs @@ -34,8 +34,9 @@ pub struct Clocks { #[derive(Clone, Debug, Serialize)] #[serde(rename_all = "kebab-case")] pub struct Memory { + #[serde(skip_serializing_if = "Option::is_none")] #[serde(rename(serialize = "bandwidth.gib"))] - pub bandwidth_gib: u32, + pub bandwidth_gib: Option, #[serde(rename(serialize = "total.gib"))] pub total_gib: f32, } From e101d454b102796e9f1f51cb827be7fe78732dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Walski?= Date: Fri, 22 Mar 2024 17:03:52 +0100 Subject: [PATCH 5/7] Test cmd performing GPU detection on non dummy runtime --- .vscode/launch.json | 44 ++++++++++++++++++++++++ Cargo.lock | 4 +-- Cargo.toml | 4 +-- {gpu-info => gpu-detection}/Cargo.toml | 4 +-- {gpu-info => gpu-detection}/README.md | 4 +-- {gpu-info => gpu-detection}/src/lib.rs | 6 ++-- {gpu-info => gpu-detection}/src/model.rs | 0 src/main.rs | 2 +- src/offer_template.rs | 17 ++++++--- 9 files changed, 68 insertions(+), 17 deletions(-) rename {gpu-info => gpu-detection}/Cargo.toml (55%) rename {gpu-info => gpu-detection}/README.md (50%) rename {gpu-info => gpu-detection}/src/lib.rs (96%) rename {gpu-info => gpu-detection}/src/model.rs (100%) diff --git a/.vscode/launch.json b/.vscode/launch.json index 4024cb3..ad454f0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -45,6 +45,50 @@ ], "cwd": "${workspaceFolder}" }, + { + "type": "lldb", + "request": "launch", + "name": "test dummy", + "cargo": { + "args": [ + "build", + "--bin=ya-runtime-ai", + "--package=ya-runtime-ai" + ], + "filter": { + "name": "ya-runtime-ai", + "kind": "bin" + } + }, + "args": [ + "--runtime", + "dummy", + "test" + ], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "test automatic", + "cargo": { + "args": [ + "build", + "--bin=ya-runtime-ai", + "--package=ya-runtime-ai" + ], + "filter": { + "name": "ya-runtime-ai", + "kind": "bin" + } + }, + "args": [ + "--runtime", + "automatic", + "test" + ], + "cwd": "${workspaceFolder}" + }, { "type": "lldb", "request": "launch", diff --git a/Cargo.lock b/Cargo.lock index 6a409c0..e7985ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1350,7 +1350,7 @@ dependencies = [ ] [[package]] -name = "gpu-info" +name = "gpu-detection" version = "0.1.0" dependencies = [ "anyhow", @@ -3807,7 +3807,7 @@ dependencies = [ "env_logger", "flexi_logger", "futures 0.3.30", - "gpu-info", + "gpu-detection", "http 0.2.11", "humantime", "humantime-serde", diff --git a/Cargo.toml b/Cargo.toml index fe56fbe..97bf194 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" [workspace] members = [ - "gpu-info", + "gpu-detection", "runtimes/dummy", ] @@ -18,7 +18,7 @@ ya-service-bus = "0.7.1" ya-client-model = "0.6.0" ya-agreement-utils = "0.5" ya-transfer = { git = "https://github.com/golemfactory/yagna.git", rev = "fb9a89b11c815b5b6b664ca6c8b1151524420aa3" } -gpu-info = { path = "gpu-info" } +gpu-detection = { path = "gpu-detection" } actix = "0.13" actix-rt = "2" diff --git a/gpu-info/Cargo.toml b/gpu-detection/Cargo.toml similarity index 55% rename from gpu-info/Cargo.toml rename to gpu-detection/Cargo.toml index fd11441..032a229 100644 --- a/gpu-info/Cargo.toml +++ b/gpu-detection/Cargo.toml @@ -1,11 +1,9 @@ [package] -name = "gpu-info" +name = "gpu-detection" version = "0.1.0" edition = "2021" readme = "README.md" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] anyhow = "1.0.81" nvml-wrapper = "0.10.0" diff --git a/gpu-info/README.md b/gpu-detection/README.md similarity index 50% rename from gpu-info/README.md rename to gpu-detection/README.md index 59b59e5..a72e6de 100644 --- a/gpu-info/README.md +++ b/gpu-detection/README.md @@ -1,5 +1,5 @@ -# gpu-info +# gpu-detection -Library provides GPU info listed in [GAP-35](https://github.com/golemfactory/golem-architecture/blob/master/gaps/gap-35_gpu_pci_capability/gap-35_gpu_pci_capability.md). +Library detects GPU info listed in [GAP-35](https://github.com/golemfactory/golem-architecture/blob/master/gaps/gap-35_gpu_pci_capability/gap-35_gpu_pci_capability.md). It supports Nvidia GPUs only. Implementation uses [nvml-wrapper](https://crates.io/crates/nvml-wrapper) to access [NVML](https://developer.nvidia.com/nvidia-management-library-nvml). diff --git a/gpu-info/src/lib.rs b/gpu-detection/src/lib.rs similarity index 96% rename from gpu-info/src/lib.rs rename to gpu-detection/src/lib.rs index da67a6a..795a9e1 100644 --- a/gpu-info/src/lib.rs +++ b/gpu-detection/src/lib.rs @@ -4,18 +4,18 @@ use nvml_wrapper::{enum_wrappers::device::Clock, Device, Nvml}; pub mod model; -pub struct GpuInfo { +pub struct GpuDetection { nvml: Nvml, } -impl GpuInfo { +impl GpuDetection { pub fn init() -> anyhow::Result { let nvml = Nvml::init()?; Ok(Self { nvml }) } /// `uuid` of GPU device. If not provided first available GPU device will be used. - pub fn info>(&self, uuid: Option) -> anyhow::Result { + pub fn detect>(&self, uuid: Option) -> anyhow::Result { if let Some(uuid) = uuid { let dev = self.nvml.device_by_uuid(uuid.as_ref()).with_context(|| { format!("Failed to get GPU device with UUID: {}.", uuid.as_ref()) diff --git a/gpu-info/src/model.rs b/gpu-detection/src/model.rs similarity index 100% rename from gpu-info/src/model.rs rename to gpu-detection/src/model.rs diff --git a/src/main.rs b/src/main.rs index ecd655a..58c9807 100644 --- a/src/main.rs +++ b/src/main.rs @@ -199,7 +199,7 @@ async fn run( return Ok(()); } Command::Test => { - // Test + offer_template::gpu_detection(&runtime_config)?; return Ok(()); } }; diff --git a/src/offer_template.rs b/src/offer_template.rs index 30ff4de..40dc472 100644 --- a/src/offer_template.rs +++ b/src/offer_template.rs @@ -1,4 +1,6 @@ use crate::process::RuntimeConfig; +use gpu_detection::model::Gpu; + use serde::{Deserialize, Serialize}; use std::borrow::Cow; use std::collections::BTreeMap; @@ -9,15 +11,22 @@ struct OfferTemplate { constraints: String, } -pub fn template( +pub(crate) fn gpu_detection(config: &CONFIG) -> anyhow::Result> { + if CONFIG::uses_gpu() { + let gpu_detection = gpu_detection::GpuDetection::init()?; + let gpu = gpu_detection.detect(config.gpu_uuid())?; + return Ok(Some(gpu)); + } + Ok(None) +} + +pub(crate) fn template( config: &CONFIG, ) -> anyhow::Result> { let offer_template = include_bytes!("offer-template.json"); let mut template: OfferTemplate = serde_json::from_slice(offer_template.as_ref())?; - if CONFIG::uses_gpu() { - let gpu_info = gpu_info::GpuInfo::init()?; - let gpu = gpu_info.info(config.gpu_uuid())?; + if let Some(gpu) = gpu_detection(config)? { let gpu = serde_json::value::to_value(gpu)?; template .properties From 6030f8030e19484584510f0c42324a377180cffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Walski?= Date: Fri, 22 Mar 2024 22:49:07 +0100 Subject: [PATCH 6/7] Runtime uses_gpu property (fot tests). Dependencies incremental version removal. --- Cargo.lock | 8 ++++---- Cargo.toml | 10 +++++----- gpu-detection/Cargo.toml | 6 +++--- src/offer_template.rs | 3 +-- src/process.rs | 4 +--- src/process/automatic.rs | 4 ---- src/process/automatic/config.rs | 8 ++++++-- src/process/dummy.rs | 6 +----- tests/resources/runtime_config.json | 3 ++- tests/runtime_config.rs | 31 +++++++++++++++-------------- 10 files changed, 39 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e7985ed..8d85fe2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1447,9 +1447,9 @@ dependencies = [ [[package]] name = "http" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b32afd38673a8016f7c9ae69e5af41a58f81b1d31689040f2f1959594ce194ea" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" dependencies = [ "bytes 1.5.0", "fnv", @@ -3767,7 +3767,7 @@ dependencies = [ "chrono", "futures 0.3.30", "futures-core", - "http 1.0.0", + "http 1.1.0", "log", "mockito", "reqwest", @@ -3808,7 +3808,7 @@ dependencies = [ "flexi_logger", "futures 0.3.30", "gpu-detection", - "http 0.2.11", + "http 1.1.0", "humantime", "humantime-serde", "log", diff --git a/Cargo.toml b/Cargo.toml index 97bf194..487a5b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ gpu-detection = { path = "gpu-detection" } actix = "0.13" actix-rt = "2" -async-trait = "0.1.77" +async-trait = "0.1" log = "0.4" serde = { version = "^1.0", features = ["derive"] } serde_json = "1.0" @@ -33,15 +33,15 @@ env_logger = "0.10" yansi = "0.5" chrono = "0.4" tokio = { version = "1.32", features = ["macros", "signal"] } -tokio-stream = { version = "0.1.14", features = ["io-util"] } +tokio-stream = { version = "0.1", features = ["io-util"] } futures = "0.3" flexi_logger = { version = "0.27", features = ["colors"] } regex = "1" reqwest = { version = "0.11", features = ["blocking", "json"] } -async-stream = "0.3.5" +async-stream = "0.3" ya-gsb-http-proxy = { git = "https://github.com/golemfactory/yagna.git", rev = "fb9a89b11c815b5b6b664ca6c8b1151524420aa3" } -http = "0.2.11" -bytes = "1.5.0" +http = "1.1" +bytes = "1.5" humantime = "2.1" humantime-serde = "1.1" diff --git a/gpu-detection/Cargo.toml b/gpu-detection/Cargo.toml index 032a229..bb5504a 100644 --- a/gpu-detection/Cargo.toml +++ b/gpu-detection/Cargo.toml @@ -5,6 +5,6 @@ edition = "2021" readme = "README.md" [dependencies] -anyhow = "1.0.81" -nvml-wrapper = "0.10.0" -serde = "1.0.197" +anyhow = "1.0" +nvml-wrapper = "0.10" +serde = "1.0" diff --git a/src/offer_template.rs b/src/offer_template.rs index 40dc472..dd3dbc8 100644 --- a/src/offer_template.rs +++ b/src/offer_template.rs @@ -10,9 +10,8 @@ struct OfferTemplate { properties: BTreeMap, constraints: String, } - pub(crate) fn gpu_detection(config: &CONFIG) -> anyhow::Result> { - if CONFIG::uses_gpu() { + if config.uses_gpu() { let gpu_detection = gpu_detection::GpuDetection::init()?; let gpu = gpu_detection.detect(config.gpu_uuid())?; return Ok(Some(gpu)); diff --git a/src/process.rs b/src/process.rs index 4bbfa4c..450d29e 100644 --- a/src/process.rs +++ b/src/process.rs @@ -40,13 +40,11 @@ pub(crate) trait Runtime: Sized { async fn stop(&mut self) -> anyhow::Result<()>; async fn wait(&mut self) -> std::io::Result; - - fn requires_gpu() -> bool; } pub(crate) trait RuntimeConfig: DeserializeOwned + Default + Debug + Clone { fn gpu_uuid(&self) -> Option; - fn uses_gpu() -> bool; + fn uses_gpu(&self) -> bool; } #[derive(Clone)] diff --git a/src/process/automatic.rs b/src/process/automatic.rs index 5723cc8..b94bdb3 100644 --- a/src/process/automatic.rs +++ b/src/process/automatic.rs @@ -83,10 +83,6 @@ impl Runtime for Automatic { log::debug!("Automatic process has stopped"); res } - - fn requires_gpu() -> bool { - true - } } fn build_cmd(model: Option, config: &Config) -> anyhow::Result { diff --git a/src/process/automatic/config.rs b/src/process/automatic/config.rs index dabd017..585877e 100644 --- a/src/process/automatic/config.rs +++ b/src/process/automatic/config.rs @@ -33,6 +33,9 @@ pub(crate) struct Config { pub monitored_msgs_w_trace_lvl: Vec, pub gpu_uuid: Option, + + // Property for testing purposes + pub uses_gpu: bool, } impl RuntimeConfig for Config { @@ -40,8 +43,8 @@ impl RuntimeConfig for Config { self.gpu_uuid.clone() } - fn uses_gpu() -> bool { - true + fn uses_gpu(&self) -> bool { + self.uses_gpu } } @@ -67,6 +70,7 @@ impl Default for Config { "\"GET / HTTP/1.1\" 404 Not Found".into(), ], gpu_uuid: None, + uses_gpu: true, } } } diff --git a/src/process/dummy.rs b/src/process/dummy.rs index 62495d2..1c2816c 100644 --- a/src/process/dummy.rs +++ b/src/process/dummy.rs @@ -30,7 +30,7 @@ impl RuntimeConfig for Config { None } - fn uses_gpu() -> bool { + fn uses_gpu(&self) -> bool { false } } @@ -85,8 +85,4 @@ impl Runtime for Dummy { let mut child = self.child.lock().await; child.wait().await } - - fn requires_gpu() -> bool { - false - } } diff --git a/tests/resources/runtime_config.json b/tests/resources/runtime_config.json index a79b023..ebc8b7b 100644 --- a/tests/resources/runtime_config.json +++ b/tests/resources/runtime_config.json @@ -15,5 +15,6 @@ "monitored_msgs_w_trace_lvl": [ "Unimportant", "Boring log" - ] + ], + "uses_gpu": false } diff --git a/tests/runtime_config.rs b/tests/runtime_config.rs index d27592a..f86403b 100644 --- a/tests/runtime_config.rs +++ b/tests/runtime_config.rs @@ -8,19 +8,20 @@ fn runtime_config_as_text_ok() { .arg("automatic") .arg("--runtime-config") .arg( - "{ \ - \"startup_script\": \"path/run.bat\", \ - \"api_port\": 80, \ - \"api_host\": \"domain.com\", \ - \"api_shutdown_path\": \"/kill/me\", \ - \"model_arg\": \"\", \ - \"additional_args\": [\"--arg-one\", \"--arg-two\"], \ - \"startup_timeout\": \"1s\", \ - \"api_ping_delay\": \"100ms\", \ - \"monitored_startup_msg\": \"Started\", \ - \"monitored_model_failure_msg\": \"Failed\", \ - \"monitored_msgs_w_trace_lvl\": [\"Unimportant\", \"Boring log\"] \ - }", + r##"{ + "startup_script": "path/run.bat", + "api_port": 80, + "api_host": "domain.com", + "api_shutdown_path": "/kill/me", + "model_arg": "", + "additional_args": ["--arg-one", "--arg-two"], + "startup_timeout": "1s", + "api_ping_delay": "100ms", + "monitored_startup_msg": "Started", + "monitored_model_failure_msg": "Failed", + "monitored_msgs_w_trace_lvl": ["Unimportant", "Boring log"], + "uses_gpu": false + }"##, ) .arg("test") .assert() @@ -33,7 +34,7 @@ fn config_parse_succ_single_field() { cmd.arg("--runtime") .arg("automatic") .arg("--runtime-config") - .arg("{ \"startup_script\": \"path/bin.exe\" }") + .arg(r##"{ "startup_script": "path/bin.exe", "uses_gpu": false }"##) .arg("test") .assert() .success(); @@ -45,7 +46,7 @@ fn config_parse_fail_field_bat_type() { cmd.arg("--runtime") .arg("automatic") .arg("--runtime-config") - .arg("{ \"startup_script\": 13 }") + .arg(r##"{ "startup_script": 13, "uses_gpu": false }"##) .arg("test") .assert() .failure(); From 85be431728c0a12135f3710984cc5a3efebba56a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Walski?= Date: Tue, 26 Mar 2024 17:23:07 +0100 Subject: [PATCH 7/7] No uses_gpu config property. Automatic config parse test. --- src/offer_template.rs | 9 ++-- src/process.rs | 1 - src/process/automatic/config.rs | 23 ++++++---- src/process/dummy.rs | 4 -- tests/runtime_config.rs | 77 --------------------------------- 5 files changed, 18 insertions(+), 96 deletions(-) delete mode 100644 tests/runtime_config.rs diff --git a/src/offer_template.rs b/src/offer_template.rs index dd3dbc8..c875f44 100644 --- a/src/offer_template.rs +++ b/src/offer_template.rs @@ -11,12 +11,9 @@ struct OfferTemplate { constraints: String, } pub(crate) fn gpu_detection(config: &CONFIG) -> anyhow::Result> { - if config.uses_gpu() { - let gpu_detection = gpu_detection::GpuDetection::init()?; - let gpu = gpu_detection.detect(config.gpu_uuid())?; - return Ok(Some(gpu)); - } - Ok(None) + let gpu_detection = gpu_detection::GpuDetection::init()?; + let gpu = gpu_detection.detect(config.gpu_uuid())?; + Ok(Some(gpu)) } pub(crate) fn template( diff --git a/src/process.rs b/src/process.rs index 450d29e..7bd6e92 100644 --- a/src/process.rs +++ b/src/process.rs @@ -44,7 +44,6 @@ pub(crate) trait Runtime: Sized { pub(crate) trait RuntimeConfig: DeserializeOwned + Default + Debug + Clone { fn gpu_uuid(&self) -> Option; - fn uses_gpu(&self) -> bool; } #[derive(Clone)] diff --git a/src/process/automatic/config.rs b/src/process/automatic/config.rs index 585877e..154f161 100644 --- a/src/process/automatic/config.rs +++ b/src/process/automatic/config.rs @@ -33,19 +33,12 @@ pub(crate) struct Config { pub monitored_msgs_w_trace_lvl: Vec, pub gpu_uuid: Option, - - // Property for testing purposes - pub uses_gpu: bool, } impl RuntimeConfig for Config { fn gpu_uuid(&self) -> Option { self.gpu_uuid.clone() } - - fn uses_gpu(&self) -> bool { - self.uses_gpu - } } impl Default for Config { @@ -70,7 +63,21 @@ impl Default for Config { "\"GET / HTTP/1.1\" 404 Not Found".into(), ], gpu_uuid: None, - uses_gpu: true, } } } + +#[cfg(test)] +mod config_tests { + use std::{fs, path::PathBuf}; + + use super::Config; + + #[test] + fn config_test() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("tests/resources/runtime_config.json"); + let config = fs::read_to_string(path).unwrap(); + serde_json::from_str::(&config).expect("Can parse config"); + } +} diff --git a/src/process/dummy.rs b/src/process/dummy.rs index 1c2816c..45787d4 100644 --- a/src/process/dummy.rs +++ b/src/process/dummy.rs @@ -29,10 +29,6 @@ impl RuntimeConfig for Config { fn gpu_uuid(&self) -> Option { None } - - fn uses_gpu(&self) -> bool { - false - } } #[async_trait] diff --git a/tests/runtime_config.rs b/tests/runtime_config.rs deleted file mode 100644 index f86403b..0000000 --- a/tests/runtime_config.rs +++ /dev/null @@ -1,77 +0,0 @@ -use assert_cmd::prelude::*; // Add methods on commands -use std::{path::PathBuf, process::Command}; // Run programs - -#[test] -fn runtime_config_as_text_ok() { - let mut cmd = Command::cargo_bin("ya-runtime-ai").unwrap(); - cmd.arg("--runtime") - .arg("automatic") - .arg("--runtime-config") - .arg( - r##"{ - "startup_script": "path/run.bat", - "api_port": 80, - "api_host": "domain.com", - "api_shutdown_path": "/kill/me", - "model_arg": "", - "additional_args": ["--arg-one", "--arg-two"], - "startup_timeout": "1s", - "api_ping_delay": "100ms", - "monitored_startup_msg": "Started", - "monitored_model_failure_msg": "Failed", - "monitored_msgs_w_trace_lvl": ["Unimportant", "Boring log"], - "uses_gpu": false - }"##, - ) - .arg("test") - .assert() - .success(); -} - -#[test] -fn config_parse_succ_single_field() { - let mut cmd = Command::cargo_bin("ya-runtime-ai").unwrap(); - cmd.arg("--runtime") - .arg("automatic") - .arg("--runtime-config") - .arg(r##"{ "startup_script": "path/bin.exe", "uses_gpu": false }"##) - .arg("test") - .assert() - .success(); -} - -#[test] -fn config_parse_fail_field_bat_type() { - let mut cmd = Command::cargo_bin("ya-runtime-ai").unwrap(); - cmd.arg("--runtime") - .arg("automatic") - .arg("--runtime-config") - .arg(r##"{ "startup_script": 13, "uses_gpu": false }"##) - .arg("test") - .assert() - .failure(); -} - -#[test] -fn succ_without_runtime_config_arg() { - let mut cmd = Command::cargo_bin("ya-runtime-ai").unwrap(); - cmd.arg("--runtime") - .arg("dummy") - .arg("test") - .assert() - .success(); -} - -#[test] -fn config_parse_file_succ() { - let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - path.push("tests/resources/runtime_config.json"); - let mut cmd = Command::cargo_bin("ya-runtime-ai").unwrap(); - cmd.arg("--runtime") - .arg("automatic") - .arg("--runtime-config") - .arg(path.to_str().unwrap()) - .arg("test") - .assert() - .success(); -}