From f30cc74fb41916d11a27e6b29ebbe73298534573 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Thu, 8 Oct 2020 14:34:11 -0700 Subject: [PATCH 1/4] Avoid SeqCst or static mut in mach_timebase_info cache --- library/std/src/sys/unix/time.rs | 58 ++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/library/std/src/sys/unix/time.rs b/library/std/src/sys/unix/time.rs index f2a9cb5a0e879..5dc1ade9c1fa1 100644 --- a/library/std/src/sys/unix/time.rs +++ b/library/std/src/sys/unix/time.rs @@ -117,8 +117,7 @@ impl Hash for Timespec { #[cfg(any(target_os = "macos", target_os = "ios"))] mod inner { use crate::fmt; - use crate::mem; - use crate::sync::atomic::{AtomicUsize, Ordering::SeqCst}; + use crate::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use crate::sys::cvt; use crate::sys_common::mul_div_u64; use crate::time::Duration; @@ -233,31 +232,46 @@ mod inner { } fn info() -> mach_timebase_info { - static mut INFO: mach_timebase_info = mach_timebase_info { numer: 0, denom: 0 }; - static STATE: AtomicUsize = AtomicUsize::new(0); + static INITIALIZED: AtomicBool = AtomicBool::new(false); + static INFO_BITS: AtomicU64 = AtomicU64::new(0); + + // If a previous thread has filled in this global INITIALIZED, use that. + if INITIALIZED.load(Ordering::Acquire) { + // The Acquire/Release pair used for INITIALIZED ensures that this + // load can see the corresponding `INFO_BITS` store, despite them + // both being Relaxed. + return info_from_bits(INFO_BITS.load(Ordering::Relaxed)); + } + + // ... otherwise learn for ourselves ... + extern "C" { + fn mach_timebase_info(info: mach_timebase_info_t) -> kern_return_t; + } + let mut info = info_from_bits(0); unsafe { - // If a previous thread has filled in this global state, use that. - if STATE.load(SeqCst) == 2 { - return INFO; - } + mach_timebase_info(&mut info); + } - // ... otherwise learn for ourselves ... - let mut info = mem::zeroed(); - extern "C" { - fn mach_timebase_info(info: mach_timebase_info_t) -> kern_return_t; - } + // Note: This is racy, but the race is against other threads trying to + // write the same value. + INFO_BITS.store(info_to_bits(info), Ordering::Relaxed); - mach_timebase_info(&mut info); + // The `Release` here "publishes" the store of `INFO_BITS` to other + // threads (which do a `INITIALIZED.load(Acquire)`) despite it being + // read/written w/ `Relaxed`. + INITIALIZED.store(true, Ordering::Release); + info + } - // ... and attempt to be the one thread that stores it globally for - // all other threads - if STATE.compare_exchange(0, 1, SeqCst, SeqCst).is_ok() { - INFO = info; - STATE.store(2, SeqCst); - } - return info; - } + #[inline] + fn info_to_bits(info: mach_timebase_info) -> u64 { + ((info.denom as u64) << 32) | (info.numer as u64) + } + + #[inline] + fn info_from_bits(bits: u64) -> mach_timebase_info { + mach_timebase_info { numer: bits as u32, denom: (bits >> 32) as u32 } } } From e4cf24bd451363cfb675af70b40dbb96a1ec2716 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Thu, 8 Oct 2020 15:17:35 -0700 Subject: [PATCH 2/4] Fiddle with the comments --- library/std/src/sys/unix/time.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/library/std/src/sys/unix/time.rs b/library/std/src/sys/unix/time.rs index 5dc1ade9c1fa1..d6a5cc26df239 100644 --- a/library/std/src/sys/unix/time.rs +++ b/library/std/src/sys/unix/time.rs @@ -235,11 +235,11 @@ mod inner { static INITIALIZED: AtomicBool = AtomicBool::new(false); static INFO_BITS: AtomicU64 = AtomicU64::new(0); - // If a previous thread has filled in this global INITIALIZED, use that. + // If a previous thread has initialized `INFO_BITS`, use that. if INITIALIZED.load(Ordering::Acquire) { - // The Acquire/Release pair used for INITIALIZED ensures that this - // load can see the corresponding `INFO_BITS` store, despite them - // both being Relaxed. + // Note: `Relaxed` is correct here and below -- the `Acquire` / + // `Release` pair used for `INITIALIZED` ensures this load can see + // the corresponding store below. return info_from_bits(INFO_BITS.load(Ordering::Relaxed)); } @@ -253,7 +253,7 @@ mod inner { mach_timebase_info(&mut info); } - // Note: This is racy, but the race is against other threads trying to + // This is racy, but the race should be against other threads trying to // write the same value. INFO_BITS.store(info_to_bits(info), Ordering::Relaxed); From 59c06e9e40c39aeec955fb2359f810285c262c34 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Thu, 8 Oct 2020 17:03:16 -0700 Subject: [PATCH 3/4] Switch to using a single atomic and treating 0 as 'uninitialized' --- library/std/src/sys/unix/time.rs | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/library/std/src/sys/unix/time.rs b/library/std/src/sys/unix/time.rs index d6a5cc26df239..fac4b05ad0b5f 100644 --- a/library/std/src/sys/unix/time.rs +++ b/library/std/src/sys/unix/time.rs @@ -117,7 +117,7 @@ impl Hash for Timespec { #[cfg(any(target_os = "macos", target_os = "ios"))] mod inner { use crate::fmt; - use crate::sync::atomic::{AtomicBool, AtomicU64, Ordering}; + use crate::sync::atomic::{AtomicU64, Ordering}; use crate::sys::cvt; use crate::sys_common::mul_div_u64; use crate::time::Duration; @@ -232,15 +232,19 @@ mod inner { } fn info() -> mach_timebase_info { - static INITIALIZED: AtomicBool = AtomicBool::new(false); + // INFO_BITS conceptually is an `Option`. We can do + // this in 64 bits because we know 0 is never a valid value for the + // `denom` field. + // + // Encoding this as a single `AtomicU64` allows us to use `Relaxed` + // operations, as we are only interested in in the effects on a single + // memory location. static INFO_BITS: AtomicU64 = AtomicU64::new(0); - // If a previous thread has initialized `INFO_BITS`, use that. - if INITIALIZED.load(Ordering::Acquire) { - // Note: `Relaxed` is correct here and below -- the `Acquire` / - // `Release` pair used for `INITIALIZED` ensures this load can see - // the corresponding store below. - return info_from_bits(INFO_BITS.load(Ordering::Relaxed)); + // If a previous thread has initialized `INFO_BITS`, use it. + let info_bits = INFO_BITS.load(Ordering::Relaxed); + if info_bits != 0 { + return info_from_bits(info_bits); } // ... otherwise learn for ourselves ... @@ -252,15 +256,7 @@ mod inner { unsafe { mach_timebase_info(&mut info); } - - // This is racy, but the race should be against other threads trying to - // write the same value. INFO_BITS.store(info_to_bits(info), Ordering::Relaxed); - - // The `Release` here "publishes" the store of `INFO_BITS` to other - // threads (which do a `INITIALIZED.load(Acquire)`) despite it being - // read/written w/ `Relaxed`. - INITIALIZED.store(true, Ordering::Release); info } From 4f3722051092bfe33ec87836e8dd4d6d155c1fc1 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Thu, 8 Oct 2020 17:04:32 -0700 Subject: [PATCH 4/4] Implement the same optimization in windows/time --- library/std/src/sys/windows/time.rs | 36 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/library/std/src/sys/windows/time.rs b/library/std/src/sys/windows/time.rs index 900260169c767..91e4f7654840d 100644 --- a/library/std/src/sys/windows/time.rs +++ b/library/std/src/sys/windows/time.rs @@ -165,7 +165,7 @@ fn intervals2dur(intervals: u64) -> Duration { mod perf_counter { use super::NANOS_PER_SEC; - use crate::sync::atomic::{AtomicUsize, Ordering::SeqCst}; + use crate::sync::atomic::{AtomicU64, Ordering}; use crate::sys::c; use crate::sys::cvt; use crate::sys_common::mul_div_u64; @@ -197,27 +197,25 @@ mod perf_counter { } fn frequency() -> c::LARGE_INTEGER { - static mut FREQUENCY: c::LARGE_INTEGER = 0; - static STATE: AtomicUsize = AtomicUsize::new(0); - + // Either the cached result of `QueryPerformanceFrequency` or `0` for + // uninitialized. Storing this as a single `AtomicU64` allows us to use + // `Relaxed` operations, as we are only interested in the effects on a + // single memory location. + static FREQUENCY: AtomicU64 = AtomicU64::new(0); + + let cached = FREQUENCY.load(Ordering::Relaxed); + // If a previous thread has filled in this global state, use that. + if cached != 0 { + return cached as c::LARGE_INTEGER; + } + // ... otherwise learn for ourselves ... + let mut frequency = 0; unsafe { - // If a previous thread has filled in this global state, use that. - if STATE.load(SeqCst) == 2 { - return FREQUENCY; - } - - // ... otherwise learn for ourselves ... - let mut frequency = 0; cvt(c::QueryPerformanceFrequency(&mut frequency)).unwrap(); - - // ... and attempt to be the one thread that stores it globally for - // all other threads - if STATE.compare_exchange(0, 1, SeqCst, SeqCst).is_ok() { - FREQUENCY = frequency; - STATE.store(2, SeqCst); - } - frequency } + + FREQUENCY.store(frequency as u64, Ordering::Relaxed); + frequency } fn query() -> c::LARGE_INTEGER {