From d12dadbc06efb02111deb82664287429ac23ed11 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Thu, 12 Oct 2023 17:47:50 +0200 Subject: [PATCH] chore: update rustc and fix future (#11696) --- README.md | 2 +- .../src/array/dictionary/value_map.rs | 4 +--- .../polars-core/src/hashing/vector_hasher.rs | 22 +++++-------------- crates/polars-io/src/cloud/options.rs | 12 ++++++---- crates/polars-ops/src/frame/hashing.rs | 8 ++----- .../join/hash_join/single_keys_dispatch.rs | 4 +--- crates/polars-ops/src/frame/join/mod.rs | 2 +- .../series/ops/approx_algo/hyperloglogplus.rs | 8 +++---- .../src/logical_plan/optimizer/cse.rs | 5 +---- crates/polars-utils/src/functions.rs | 6 ++--- py-polars/tests/unit/dataframe/test_df.py | 2 +- rust-toolchain.toml | 2 +- 12 files changed, 28 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index feb2aa57e96e..b381350fce96 100644 --- a/README.md +++ b/README.md @@ -222,7 +222,7 @@ point to the `main` branch of this repo. polars = { git = "https://github.com/pola-rs/polars", rev = "" } ``` -Required Rust version `>=1.65`. +Required Rust version `>=1.71`. ## Contributing diff --git a/crates/nano-arrow/src/array/dictionary/value_map.rs b/crates/nano-arrow/src/array/dictionary/value_map.rs index 5a12534766bd..f9d22edfffe5 100644 --- a/crates/nano-arrow/src/array/dictionary/value_map.rs +++ b/crates/nano-arrow/src/array/dictionary/value_map.rs @@ -43,9 +43,7 @@ pub struct Hashed { #[inline] fn ahash_hash(value: &T) -> u64 { - let mut hasher = BuildHasherDefault::::default().build_hasher(); - value.hash(&mut hasher); - hasher.finish() + BuildHasherDefault::::default().hash_one(value) } impl Hash for Hashed { diff --git a/crates/polars-core/src/hashing/vector_hasher.rs b/crates/polars-core/src/hashing/vector_hasher.rs index 5a481e24a5c1..0d5c5b64ec9a 100644 --- a/crates/polars-core/src/hashing/vector_hasher.rs +++ b/crates/polars-core/src/hashing/vector_hasher.rs @@ -42,12 +42,8 @@ pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 { pub(crate) fn get_null_hash_value(random_state: RandomState) -> u64 { // we just start with a large prime number and hash that twice // to get a constant hash value for null/None - let mut hasher = random_state.build_hasher(); - 3188347919usize.hash(&mut hasher); - let first = hasher.finish(); - let mut hasher = random_state.build_hasher(); - first.hash(&mut hasher); - hasher.finish() + let first = random_state.hash_one(3188347919usize); + random_state.hash_one(first) } fn insert_null_hash(chunks: &[ArrayRef], random_state: RandomState, buf: &mut Vec) { @@ -392,13 +388,8 @@ where buf.clear(); buf.reserve(self.len()); - self.downcast_iter().for_each(|arr| { - buf.extend(arr.into_iter().map(|opt_v| { - let mut hasher = random_state.build_hasher(); - opt_v.hash(&mut hasher); - hasher.finish() - })) - }); + self.downcast_iter() + .for_each(|arr| buf.extend(arr.into_iter().map(|opt_v| random_state.hash_one(opt_v)))); Ok(()) } @@ -406,9 +397,8 @@ where fn vec_hash_combine(&self, random_state: RandomState, hashes: &mut [u64]) -> PolarsResult<()> { self.apply_to_slice( |opt_v, h| { - let mut hasher = random_state.build_hasher(); - opt_v.hash(&mut hasher); - _boost_hash_combine(hasher.finish(), *h) + let hashed = random_state.hash_one(opt_v); + _boost_hash_combine(hashed, *h) }, hashes, ); diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index 8866b0f5c8d8..78aedaf8e90f 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -30,8 +30,8 @@ use smartstring::alias::String as SmartString; use url::Url; #[cfg(feature = "aws")] -static BUCKET_REGION: Lazy>> = - Lazy::new(|| tokio::sync::Mutex::new(FastFixedCache::default())); +static BUCKET_REGION: Lazy>> = + Lazy::new(|| std::sync::Mutex::new(FastFixedCache::new(32))); /// The type of the config keys must satisfy the following requirements: /// 1. must be easily collected into a HashMap, the type required by the object_crate API. @@ -146,10 +146,13 @@ impl CloudOptions { .get_config_value(&AmazonS3ConfigKey::Region) .is_none() { - let mut bucket_region = BUCKET_REGION.lock().await; let bucket = crate::cloud::CloudLocation::new(url)?.bucket; + let region = { + let bucket_region = BUCKET_REGION.lock().unwrap(); + bucket_region.get(bucket.as_str()).cloned() + }; - match bucket_region.get(bucket.as_str()) { + match region { Some(region) => { builder = builder.with_config(AmazonS3ConfigKey::Region, region.as_str()) }, @@ -165,6 +168,7 @@ impl CloudOptions { if let Some(region) = result.headers().get("x-amz-bucket-region") { let region = std::str::from_utf8(region.as_bytes()).map_err(to_compute_err)?; + let mut bucket_region = BUCKET_REGION.lock().unwrap(); bucket_region.insert(bucket.into(), region.into()); builder = builder.with_config(AmazonS3ConfigKey::Region, region) } diff --git a/crates/polars-ops/src/frame/hashing.rs b/crates/polars-ops/src/frame/hashing.rs index e4bfb2f9c11d..245f125edfb9 100644 --- a/crates/polars-ops/src/frame/hashing.rs +++ b/crates/polars-ops/src/frame/hashing.rs @@ -1,4 +1,4 @@ -use std::hash::{BuildHasher, Hash, Hasher}; +use std::hash::Hash; use ahash::RandomState; use hashbrown::hash_map::RawEntryMut; @@ -88,11 +88,7 @@ where .map(|iter| { // create hashes and keys iter.into_iter() - .map(|val| { - let mut hasher = build_hasher.build_hasher(); - val.hash(&mut hasher); - (hasher.finish(), val) - }) + .map(|val| (build_hasher.hash_one(&val), val)) .collect_trusted::>() }) .collect() diff --git a/crates/polars-ops/src/frame/join/hash_join/single_keys_dispatch.rs b/crates/polars-ops/src/frame/join/hash_join/single_keys_dispatch.rs index 5b8428738e4f..fefb207af236 100644 --- a/crates/polars-ops/src/frame/join/hash_join/single_keys_dispatch.rs +++ b/crates/polars-ops/src/frame/join/hash_join/single_keys_dispatch.rs @@ -366,9 +366,7 @@ pub fn prepare_bytes<'a>( .map(|ca| { ca.into_iter() .map(|opt_b| { - let mut state = hb.build_hasher(); - opt_b.hash(&mut state); - let hash = state.finish(); + let hash = hb.hash_one(opt_b); BytesHash::new(opt_b, hash) }) .collect::>() diff --git a/crates/polars-ops/src/frame/join/mod.rs b/crates/polars-ops/src/frame/join/mod.rs index 3a2e4fef7abd..4c7fff623464 100644 --- a/crates/polars-ops/src/frame/join/mod.rs +++ b/crates/polars-ops/src/frame/join/mod.rs @@ -12,7 +12,7 @@ mod merge_sorted; #[cfg(feature = "chunked_ids")] use std::borrow::Cow; use std::fmt::{Debug, Display, Formatter}; -use std::hash::{BuildHasher, Hash, Hasher}; +use std::hash::Hash; use ahash::RandomState; pub use args::*; diff --git a/crates/polars-ops/src/series/ops/approx_algo/hyperloglogplus.rs b/crates/polars-ops/src/series/ops/approx_algo/hyperloglogplus.rs index 133e4e3f8298..7df61317d9bc 100644 --- a/crates/polars-ops/src/series/ops/approx_algo/hyperloglogplus.rs +++ b/crates/polars-ops/src/series/ops/approx_algo/hyperloglogplus.rs @@ -16,10 +16,10 @@ //! assert_eq!(hllp.count(), 2); //! ``` -use std::hash::{BuildHasher, Hash, Hasher}; +use std::hash::Hash; use std::marker::PhantomData; -use polars_core::export::ahash::{AHasher, RandomState}; +use polars_core::export::ahash::RandomState; /// The greater is P, the smaller the error. const HLL_P: usize = 14_usize; @@ -85,9 +85,7 @@ where /// reasonable performance. #[inline] fn hash_value(&self, obj: &T) -> u64 { - let mut hasher: AHasher = SEED.build_hasher(); - obj.hash(&mut hasher); - hasher.finish() + SEED.hash_one(obj) } /// Adds an element to the HyperLogLog. diff --git a/crates/polars-plan/src/logical_plan/optimizer/cse.rs b/crates/polars-plan/src/logical_plan/optimizer/cse.rs index 6ad2d63ef1e4..11e4c45ca925 100644 --- a/crates/polars-plan/src/logical_plan/optimizer/cse.rs +++ b/crates/polars-plan/src/logical_plan/optimizer/cse.rs @@ -1,7 +1,6 @@ //! Common Subplan Elimination use std::collections::{BTreeMap, BTreeSet}; -use std::hash::{BuildHasher, Hash, Hasher}; use polars_core::prelude::*; @@ -310,9 +309,7 @@ pub(crate) fn elim_cmn_subplans( (Some(h), _) => *h, (_, Some(h)) => *h, _ => { - let mut h = hb.build_hasher(); - node1.hash(&mut h); - let hash = h.finish(); + let hash = hb.hash_one(node1); let mut cache_id = lp_cache.wrapping_add(hash as usize); // this ensures we can still add branch ids without overflowing // during the dot representation diff --git a/crates/polars-utils/src/functions.rs b/crates/polars-utils/src/functions.rs index 88cc128e6a6a..47bece31d73d 100644 --- a/crates/polars-utils/src/functions.rs +++ b/crates/polars-utils/src/functions.rs @@ -1,4 +1,4 @@ -use std::hash::{BuildHasher, Hash, Hasher}; +use std::hash::{BuildHasher, Hash}; // Faster than collecting from a flattened iterator. pub fn flatten>(bufs: &[R], len: Option) -> Vec { @@ -20,7 +20,5 @@ pub fn hash_to_partition(h: u64, n_partitions: usize) -> usize { #[inline] pub fn get_hash(value: T, hb: &B) -> u64 { - let mut hasher = hb.build_hasher(); - value.hash(&mut hasher); - hasher.finish() + hb.hash_one(value) } diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index b5c38891f41f..ed181ea6ca51 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -1601,7 +1601,7 @@ def test_reproducible_hash_with_seeds() -> None: if platform.mac_ver()[-1] != "arm64": expected = pl.Series( "s", - [13477868900383131459, 988796329533502010, 16840582678788620208], + [13477868900383131459, 6344663067812082469, 16840582678788620208], dtype=pl.UInt64, ) result = df.hash_rows(*seeds) diff --git a/rust-toolchain.toml b/rust-toolchain.toml index a28c6ce5f0fe..4a5741c539f4 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,2 @@ [toolchain] -channel = "nightly-2023-10-02" +channel = "nightly-2023-10-12"