From 61c6c802dde3707f28740243e2b9265ced4aebc9 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 26 Jun 2023 10:07:20 +0200 Subject: [PATCH 01/15] Update ordered-float to version 4 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b5e028b..dd4bb55 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ fnv = "1" itertools = "0.10" murmur3 = "0.5" ndarray = "0.15" -ordered-float = "2" +ordered-float = "4" rand = "0.8" rand_chacha = "0.3" reductive = "0.9" From 06916c78f0142dabcdece0ed44882e910ea926d4 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 26 Jun 2023 10:07:40 +0200 Subject: [PATCH 02/15] Update itertools to 0.11 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index dd4bb55..0cbd7a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ exclude = [ [dependencies] byteorder = "1" fnv = "1" -itertools = "0.10" +itertools = "0.11" murmur3 = "0.5" ndarray = "0.15" ordered-float = "4" From 1a743f900fc7f8a26ca72f20b9c8a8693981a063 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 26 Jun 2023 10:16:56 +0200 Subject: [PATCH 03/15] Update memmap2 to 0.9 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0cbd7a2..fc34665 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ thiserror = "1" toml = "0.5" [dependencies.memmap2] -version = "0.5" +version = "0.9" optional = true [features] From 335f2d7e7867234868bd5cb6cf23f8fd060b502e Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 26 Jun 2023 10:35:28 +0200 Subject: [PATCH 04/15] Update approx to 0.5 --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fc34665..b385a62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ byteorder = "1" fnv = "1" itertools = "0.11" murmur3 = "0.5" -ndarray = "0.15" +ndarray = { version = "0.15", features = ["approx-0_5"] } ordered-float = "4" rand = "0.8" rand_chacha = "0.3" @@ -39,7 +39,7 @@ default = ["memmap"] memmap = ["memmap2"] [dev-dependencies] -approx = "0.4" +approx = "0.5" criterion = "0.3" lazy_static = "1" maplit = "1" From 98d452f7bef21d37435e4bc964fdacf416c7ca89 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 26 Jun 2023 10:42:23 +0200 Subject: [PATCH 05/15] Upgrade criterion to 0.5 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b385a62..2de5230 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,7 @@ memmap = ["memmap2"] [dev-dependencies] approx = "0.5" -criterion = "0.3" +criterion = "0.5" lazy_static = "1" maplit = "1" tempfile = "3" From dd193b2805400878fcfc20bf931ca21029eb2882 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 9 Oct 2023 09:59:14 +0200 Subject: [PATCH 06/15] Upgrade to toml 0.8 --- Cargo.toml | 2 +- src/chunks/metadata.rs | 18 +++++++++--------- src/compat/fasttext/io.rs | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2de5230..0d56008 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ reductive = "0.9" serde = { version = "1", features = ["derive"] } smallvec = "1.7" thiserror = "1" -toml = "0.5" +toml = "0.8" [dependencies.memmap2] version = "0.9" diff --git a/src/chunks/metadata.rs b/src/chunks/metadata.rs index 0b0fffa..eef1048 100644 --- a/src/chunks/metadata.rs +++ b/src/chunks/metadata.rs @@ -5,7 +5,7 @@ use std::mem; use std::ops::{Deref, DerefMut}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; -use toml::Value; +use toml::Table; use crate::chunks::io::{ChunkIdentifier, Header, ReadChunk, WriteChunk}; use crate::error::{Error, Result}; @@ -16,18 +16,18 @@ use crate::io::ReadMetadata; /// finalfusion metadata in TOML format. #[derive(Clone, Debug, PartialEq)] pub struct Metadata { - inner: Value, + inner: Table, } impl Metadata { /// Construct new `Metadata`. - pub fn new(data: Value) -> Self { - Metadata { inner: data } + pub fn new(inner: Table) -> Self { + Metadata { inner } } } impl Deref for Metadata { - type Target = Value; + type Target = Table; fn deref(&self) -> &Self::Target { &self.inner @@ -40,9 +40,9 @@ impl DerefMut for Metadata { } } -impl From for Metadata { - fn from(value: Value) -> Self { - Metadata { inner: value } +impl From for Metadata { + fn from(inner: Table) -> Self { + Metadata { inner } } } @@ -69,7 +69,7 @@ impl ReadChunk for Metadata { Ok(Metadata::new( buf_str - .parse::() + .parse::
() .map_err(|e| Error::Format(format!("Cannot deserialize TOML metadata: {}", e))) .map_err(Error::from)?, )) diff --git a/src/compat/fasttext/io.rs b/src/compat/fasttext/io.rs index 560c92a..5d62031 100644 --- a/src/compat/fasttext/io.rs +++ b/src/compat/fasttext/io.rs @@ -5,7 +5,7 @@ use std::ops::Mul; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use ndarray::{s, Array2, ErrorKind as ShapeErrorKind, ShapeError}; use serde::Serialize; -use toml::Value; +use toml::Table; use crate::chunks::metadata::Metadata; use crate::chunks::norms::NdNorms; @@ -107,7 +107,7 @@ impl ReadFastTextPrivate for Embeddings { ))); } - let metadata = Value::try_from(config).map_err(|e| { + let metadata = Table::try_from(config).map_err(|e| { Error::Format(format!("Cannot serialize model metadata to TOML: {}", e)) })?; From e3a68d6d475e669315e164bfb3841b7ebb5eb700 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 9 Oct 2023 10:27:14 +0200 Subject: [PATCH 07/15] Bump MSRV to 1.66 --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index ac6cfb3..b3ac973 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -90,7 +90,7 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: 1.54.0 + toolchain: 1.66.0 override: true - run: rustup component add clippy - uses: actions-rs/cargo@v1 From 98f8617e055095b29f9a61760bd4354e03222df2 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 9 Oct 2023 10:29:29 +0200 Subject: [PATCH 08/15] Add rust-version metadata --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index 0d56008..4916161 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ name = "finalfusion" version = "0.17.1" edition = "2018" +rust-version = "1.66" authors = ["Daniël de Kok ", "Sebastian Pütz "] description = "Reader and writer for common word embedding formats" documentation = "https://docs.rs/finalfusion/" From 569c789a83e0682d4f47a7ae6af7b37737a5dc6e Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 9 Oct 2023 10:32:54 +0200 Subject: [PATCH 09/15] Apply clippy suggestions --- benches/array.rs | 12 ++++++------ benches/quantized.rs | 12 ++++++------ benches/subword.rs | 1 - src/chunks/norms.rs | 6 +++--- src/chunks/storage/array.rs | 12 ++++++------ src/chunks/storage/quantized.rs | 10 +++++----- src/chunks/storage/wrappers.rs | 8 ++++---- src/chunks/vocab/simple.rs | 4 ++-- src/chunks/vocab/subword.rs | 2 +- src/chunks/vocab/wrappers.rs | 2 +- src/similarity.rs | 10 +++++----- src/subword.rs | 11 +++-------- src/util.rs | 4 ++-- 13 files changed, 44 insertions(+), 50 deletions(-) diff --git a/benches/array.rs b/benches/array.rs index 32c79ea..b849ae7 100644 --- a/benches/array.rs +++ b/benches/array.rs @@ -25,18 +25,18 @@ fn allround_iter() -> impl Iterator + Clone { corpus.into_iter() } -fn known_iter<'a>( - embeds: &'a Embeddings, -) -> impl 'a + Iterator + Clone { +fn known_iter( + embeds: &Embeddings, +) -> impl '_ + Iterator + Clone { allround_iter().filter_map(move |w| match embeds.vocab().idx(&w) { Some(WordIndex::Word(_)) => Some(w), _ => None, }) } -fn unknown_iter<'a>( - embeds: &'a Embeddings, -) -> impl 'a + Iterator + Clone { +fn unknown_iter( + embeds: &Embeddings, +) -> impl '_ + Iterator + Clone { allround_iter().filter_map(move |w| match embeds.vocab().idx(&w) { Some(WordIndex::Subword(_)) => Some(w), _ => None, diff --git a/benches/quantized.rs b/benches/quantized.rs index 655725b..5029818 100644 --- a/benches/quantized.rs +++ b/benches/quantized.rs @@ -25,18 +25,18 @@ fn allround_iter() -> impl Iterator + Clone { corpus.into_iter() } -fn known_iter<'a>( - embeds: &'a Embeddings, -) -> impl 'a + Iterator + Clone { +fn known_iter( + embeds: &Embeddings, +) -> impl '_ + Iterator + Clone { allround_iter().filter_map(move |w| match embeds.vocab().idx(&w) { Some(WordIndex::Word(_)) => Some(w), _ => None, }) } -fn unknown_iter<'a>( - embeds: &'a Embeddings, -) -> impl 'a + Iterator + Clone { +fn unknown_iter( + embeds: &Embeddings, +) -> impl '_ + Iterator + Clone { allround_iter().filter_map(move |w| match embeds.vocab().idx(&w) { Some(WordIndex::Subword(_)) => Some(w), _ => None, diff --git a/benches/subword.rs b/benches/subword.rs index 5b47c8b..eafb0b0 100644 --- a/benches/subword.rs +++ b/benches/subword.rs @@ -13,7 +13,6 @@ fn subwords(string: &str, min_n: usize, max_n: usize, indexer: &impl Indexer) -> // evaluates them. string .subword_indices(min_n, max_n, indexer) - .into_iter() .fold(0, |sum, v| sum.wrapping_add(v)) } diff --git a/src/chunks/norms.rs b/src/chunks/norms.rs index 45d962c..9997e7c 100644 --- a/src/chunks/norms.rs +++ b/src/chunks/norms.rs @@ -71,7 +71,7 @@ impl ReadChunk for NdNorms { f32::ensure_data_type(read)?; let n_padding = - padding::(read.seek(SeekFrom::Current(0)).map_err(|e| { + padding::(read.stream_position().map_err(|e| { Error::read_error("Cannot get file position for computing padding", e) })?); read.seek(SeekFrom::Current(n_padding as i64)) @@ -109,12 +109,12 @@ impl WriteChunk for NdNorms { write .write_u32::(ChunkIdentifier::NdNorms as u32) .map_err(|e| Error::write_error("Cannot write norms chunk identifier", e))?; - let n_padding = padding::(write.seek(SeekFrom::Current(0)).map_err(|e| { + let n_padding = padding::(write.stream_position().map_err(|e| { Error::write_error("Cannot get file position for computing padding", e) })?); let remaining_chunk_len = - self.chunk_len(write.seek(SeekFrom::Current(0)).map_err(|e| { + self.chunk_len(write.stream_position().map_err(|e| { Error::read_error("Cannot get file position for computing padding", e) })?) - (size_of::() + size_of::()) as u64; diff --git a/src/chunks/storage/array.rs b/src/chunks/storage/array.rs index 85ce794..f16b071 100644 --- a/src/chunks/storage/array.rs +++ b/src/chunks/storage/array.rs @@ -134,7 +134,7 @@ mod mmap { // The components of the embedding matrix should be of type f32. f32::ensure_data_type(read)?; - let n_padding = padding::(read.seek(SeekFrom::Current(0)).map_err(|e| { + let n_padding = padding::(read.stream_position().map_err(|e| { Error::read_error("Cannot get file position for computing padding", e) })?); read.seek(SeekFrom::Current(n_padding as i64)) @@ -142,7 +142,7 @@ mod mmap { // Set up memory mapping. let matrix_len = shape.size() * size_of::(); - let offset = read.seek(SeekFrom::Current(0)).map_err(|e| { + let offset = read.stream_position().map_err(|e| { Error::read_error( "Cannot get file position for memory mapping embedding matrix", e, @@ -153,7 +153,7 @@ mod mmap { mmap_opts .offset(offset) .len(matrix_len) - .map(&*read.get_ref()) + .map(read.get_ref()) .map_err(|e| Error::read_error("Cannot memory map embedding matrix", e))? }; @@ -218,13 +218,13 @@ impl NdArray { write .write_u32::(ChunkIdentifier::NdArray as u32) .map_err(|e| Error::write_error("Cannot write embedding matrix chunk identifier", e))?; - let n_padding = padding::(write.seek(SeekFrom::Current(0)).map_err(|e| { + let n_padding = padding::(write.stream_position().map_err(|e| { Error::write_error("Cannot get file position for computing padding", e) })?); let remaining_chunk_len = Self::chunk_len( data.view(), - write.seek(SeekFrom::Current(0)).map_err(|e| { + write.stream_position().map_err(|e| { Error::read_error("Cannot get file position for computing padding", e) })?, ) - (size_of::() + size_of::()) as u64; @@ -346,7 +346,7 @@ impl ReadChunk for NdArray { f32::ensure_data_type(read)?; let n_padding = - padding::(read.seek(SeekFrom::Current(0)).map_err(|e| { + padding::(read.stream_position().map_err(|e| { Error::read_error("Cannot get file position for computing padding", e) })?); read.seek(SeekFrom::Current(n_padding as i64)) diff --git a/src/chunks/storage/quantized.rs b/src/chunks/storage/quantized.rs index 50db780..5038f10 100644 --- a/src/chunks/storage/quantized.rs +++ b/src/chunks/storage/quantized.rs @@ -117,7 +117,7 @@ impl QuantizedArray { f32::ensure_data_type(read)?; let n_padding = - padding::(read.seek(SeekFrom::Current(0)).map_err(|e| { + padding::(read.stream_position().map_err(|e| { Error::read_error("Cannot get file position for computing padding", e) })?); read.seek(SeekFrom::Current(n_padding as i64)) @@ -171,12 +171,12 @@ impl QuantizedArray { quantizer, quantized.view(), norms, - write.seek(SeekFrom::Current(0)).map_err(|e| { + write.stream_position().map_err(|e| { Error::read_error("Cannot get file position for computing padding", e) })?, ) - (size_of::() + size_of::()) as u64; - let n_padding = padding::(write.seek(SeekFrom::Current(0)).map_err(|e| { + let n_padding = padding::(write.stream_position().map_err(|e| { Error::write_error("Cannot get file position for computing padding", e) })?); @@ -562,7 +562,7 @@ mod mmap { n_embeddings: usize, quantized_len: usize, ) -> Result { - let offset = read.seek(SeekFrom::Current(0)).map_err(|e| { + let offset = read.stream_position().map_err(|e| { Error::read_error( "Cannot get file position for memory mapping embedding matrix", e, @@ -574,7 +574,7 @@ mod mmap { mmap_opts .offset(offset) .len(matrix_len) - .map(&*read.get_ref()) + .map(read.get_ref()) .map_err(|e| { Error::read_error("Cannot memory map quantized embedding matrix", e) })? diff --git a/src/chunks/storage/wrappers.rs b/src/chunks/storage/wrappers.rs index 9f81cc1..4b0b16f 100644 --- a/src/chunks/storage/wrappers.rs +++ b/src/chunks/storage/wrappers.rs @@ -126,7 +126,7 @@ impl ReadChunk for StorageWrap { R: Read + Seek, { let chunk_start_pos = read - .seek(SeekFrom::Current(0)) + .stream_position() .map_err(|e| Error::read_error("Cannot get storage chunk start position", e))?; let chunk_id = read @@ -156,7 +156,7 @@ impl ReadChunk for StorageWrap { impl MmapChunk for StorageWrap { fn mmap_chunk(read: &mut BufReader) -> Result { let chunk_start_pos = read - .seek(SeekFrom::Current(0)) + .stream_position() .map_err(|e| Error::read_error("Cannot get storage chunk start position", e))?; let chunk_id = read @@ -306,7 +306,7 @@ impl ReadChunk for StorageViewWrap { R: Read + Seek, { let chunk_start_pos = read - .seek(SeekFrom::Current(0)) + .stream_position() .map_err(|e| Error::read_error("Cannot get storage chunk start position", e))?; let chunk_id = read @@ -361,7 +361,7 @@ impl WriteChunk for StorageViewWrap { impl MmapChunk for StorageViewWrap { fn mmap_chunk(read: &mut BufReader) -> Result { let chunk_start_pos = read - .seek(SeekFrom::Current(0)) + .stream_position() .map_err(|e| Error::read_error("Cannot get storage chunk start position", e))?; let chunk_id = read diff --git a/src/chunks/vocab/simple.rs b/src/chunks/vocab/simple.rs index 2915bb8..170d2ad 100644 --- a/src/chunks/vocab/simple.rs +++ b/src/chunks/vocab/simple.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; use std::convert::TryInto; -use std::io::{Read, Seek, SeekFrom, Write}; +use std::io::{Read, Seek, Write}; use std::mem::size_of; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; @@ -103,7 +103,7 @@ impl WriteChunk for SimpleVocab { .map_err(|e| Error::write_error("Cannot write vocabulary chunk identifier", e))?; let remaining_chunk_len = - self.chunk_len(write.seek(SeekFrom::Current(0)).map_err(|e| { + self.chunk_len(write.stream_position().map_err(|e| { Error::read_error("Cannot get file position for computing padding", e) })?) - (size_of::() + size_of::()) as u64; diff --git a/src/chunks/vocab/subword.rs b/src/chunks/vocab/subword.rs index 325422d..a4784ea 100644 --- a/src/chunks/vocab/subword.rs +++ b/src/chunks/vocab/subword.rs @@ -400,7 +400,7 @@ where .read_u32::() .map_err(|e| Error::read_error("Cannot read number of buckets", e))?; - let words = read_vocab_items(read, vocab_len as usize)?; + let words = read_vocab_items(read, vocab_len)?; Ok(SubwordVocab::new( words, diff --git a/src/chunks/vocab/wrappers.rs b/src/chunks/vocab/wrappers.rs index e9563b6..47e5571 100644 --- a/src/chunks/vocab/wrappers.rs +++ b/src/chunks/vocab/wrappers.rs @@ -110,7 +110,7 @@ impl ReadChunk for VocabWrap { R: Read + Seek, { let chunk_start_pos = read - .seek(SeekFrom::Current(0)) + .stream_position() .map_err(|e| Error::read_error("Cannot get vocabulary chunk start position", e))?; let chunk_id = read .read_u32::() diff --git a/src/similarity.rs b/src/similarity.rs index e594850..67f246f 100644 --- a/src/similarity.rs +++ b/src/similarity.rs @@ -362,7 +362,7 @@ where #[cfg(test)] mod tests { - + use std::f32; use std::fs::File; use std::io::BufReader; @@ -482,7 +482,7 @@ mod tests { .abs_diff_eq(&1f32, 1e-5)); assert!((WordSimilarityResult { word: "test", - similarity: NotNan::new(0.70710678).unwrap() + similarity: NotNan::new(f32::consts::FRAC_1_SQRT_2).unwrap() }) .angular_similarity() .abs_diff_eq(&0.75, 1e-5)); @@ -510,7 +510,7 @@ mod tests { .abs_diff_eq(&0f32, 1e-5)); assert!((WordSimilarityResult { word: "test", - similarity: NotNan::new(0.70710678).unwrap() + similarity: NotNan::new(f32::consts::FRAC_1_SQRT_2).unwrap() }) .euclidean_distance() .abs_diff_eq(&0.76537, 1e-5)); @@ -538,7 +538,7 @@ mod tests { .abs_diff_eq(&1f32, 1e-5)); assert!((WordSimilarityResult { word: "test", - similarity: NotNan::new(0.70710678).unwrap() + similarity: NotNan::new(f32::consts::FRAC_1_SQRT_2).unwrap() }) .euclidean_similarity() .abs_diff_eq(&0.61732, 1e-5)); @@ -602,7 +602,7 @@ mod tests { assert_eq!(10, result.len()); assert_eq!(result.next().unwrap().word, "Berlin"); - for (idx, word_similarity) in result.into_iter().enumerate() { + for (idx, word_similarity) in result.enumerate() { assert_eq!(SIMILARITY_ORDER[idx], word_similarity.word) } } diff --git a/src/subword.rs b/src/subword.rs index 6d1c326..4a91502 100644 --- a/src/subword.rs +++ b/src/subword.rs @@ -86,17 +86,13 @@ where } fn buckets(&self) -> usize { - self.buckets_exp as usize + self.buckets_exp } } impl Clone for HashIndexer { fn clone(&self) -> Self { - HashIndexer { - buckets_exp: self.buckets_exp, - mask: self.mask, - _phantom: PhantomData, - } + *self } } @@ -699,8 +695,7 @@ mod tests { .subword_indices_with_ngrams(3, 6, &indexer) .collect::>(); ngrams_indices_test.sort_by_key(|ngrams_indices_pairs| ngrams_indices_pairs.1.clone()); - for (iter_check, iter_test) in ngrams_indices_check.into_iter().zip(ngrams_indices_test) - { + for (iter_check, iter_test) in ngrams_indices_check.iter().zip(ngrams_indices_test) { assert_eq!(iter_check.0, iter_test.0); } } diff --git a/src/util.rs b/src/util.rs index 63f7f20..30e8f7f 100644 --- a/src/util.rs +++ b/src/util.rs @@ -20,7 +20,7 @@ impl FromIteratorWithCapacity for Vec { I: IntoIterator, { let mut v = Vec::with_capacity(capacity); - v.extend(iter.into_iter()); + v.extend(iter); v } } @@ -31,7 +31,7 @@ impl FromIteratorWithCapacity for VecDeque { I: IntoIterator, { let mut v = VecDeque::with_capacity(capacity); - v.extend(iter.into_iter()); + v.extend(iter); v } } From 47baada892b51af5b615177cd55944b00a49c339 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 9 Oct 2023 10:34:06 +0200 Subject: [PATCH 10/15] Use powerpc-unknown-linux-gnu to test big-endian The mips64 target we used before has been demoted to tier 3: https://github.com/rust-lang/rust/issues/115218 --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index b3ac973..76e5fdd 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -36,7 +36,7 @@ jobs: target: - aarch64-unknown-linux-gnu - i686-unknown-linux-gnu - - mips64-unknown-linux-gnuabi64 + - powerpc-unknown-linux-gnu steps: - uses: actions/checkout@v1 - uses: actions-rs/toolchain@v1 From 578c5bdeb39afe77a900f47c2b54f7a75162f380 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 9 Oct 2023 10:43:46 +0200 Subject: [PATCH 11/15] Explicitly ignore Cargo.lock --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c452573..1f076f7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .* *.bk target +/Cargo.lock From d1c0ea19f4e3d9439df7d3036effe253d9dd8cea Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 9 Oct 2023 10:45:35 +0200 Subject: [PATCH 12/15] Remove non-existent file from package.exclude --- Cargo.toml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4916161..9082cd5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,10 +11,7 @@ homepage = "https://github.com/finalfusion/finalfusion-rust" repository = "https://github.com/finalfusion/finalfusion-rust" license = "MIT OR Apache-2.0" readme = "README.md" -exclude = [ - ".gitignore", - ".travis.yml" -] +exclude = [".gitignore"] [dependencies] byteorder = "1" From fab151d637b2fb26874b3ec7b7446dd1426eb419 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 9 Oct 2023 10:46:00 +0200 Subject: [PATCH 13/15] Remove authors from Cargo metadata (per RFC 3052) --- Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9082cd5..e8ff511 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,6 @@ name = "finalfusion" version = "0.17.1" edition = "2018" rust-version = "1.66" -authors = ["Daniël de Kok ", "Sebastian Pütz "] description = "Reader and writer for common word embedding formats" documentation = "https://docs.rs/finalfusion/" keywords = ["embeddings", "word2vec", "glove", "finalfusion", "fasttext"] From 545757b37a939e794528930c2ef3a402c9b2f379 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 9 Oct 2023 10:48:38 +0200 Subject: [PATCH 14/15] Upgrade to 2021 edition --- Cargo.toml | 2 +- src/chunks/io.rs | 1 - src/chunks/norms.rs | 1 - src/chunks/storage/array.rs | 2 -- src/chunks/storage/quantized.rs | 1 - src/chunks/storage/wrappers.rs | 1 - src/chunks/vocab/simple.rs | 1 - src/chunks/vocab/subword.rs | 1 - src/chunks/vocab/wrappers.rs | 1 - src/compat/fasttext/io.rs | 1 - 10 files changed, 1 insertion(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e8ff511..8175461 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "finalfusion" version = "0.17.1" -edition = "2018" +edition = "2021" rust-version = "1.66" description = "Reader and writer for common word embedding formats" documentation = "https://docs.rs/finalfusion/" diff --git a/src/chunks/io.rs b/src/chunks/io.rs index d59de49..c25a25f 100644 --- a/src/chunks/io.rs +++ b/src/chunks/io.rs @@ -1,4 +1,3 @@ -use std::convert::TryFrom; use std::fmt::{self, Display}; use std::fs::File; use std::io::{BufReader, Read, Seek, Write}; diff --git a/src/chunks/norms.rs b/src/chunks/norms.rs index 9997e7c..05cd924 100644 --- a/src/chunks/norms.rs +++ b/src/chunks/norms.rs @@ -1,6 +1,5 @@ //! Norms chunk -use std::convert::TryInto; use std::io::{Read, Seek, SeekFrom, Write}; use std::mem; use std::mem::size_of; diff --git a/src/chunks/storage/array.rs b/src/chunks/storage/array.rs index f16b071..7d0c632 100644 --- a/src/chunks/storage/array.rs +++ b/src/chunks/storage/array.rs @@ -1,4 +1,3 @@ -use std::convert::TryInto; use std::io::{Read, Seek, SeekFrom, Write}; use std::mem; use std::mem::size_of; @@ -13,7 +12,6 @@ use crate::util::padding; #[cfg(feature = "memmap")] mod mmap { - use std::convert::TryInto; use std::fs::File; #[cfg(target_endian = "little")] use std::io::Write; diff --git a/src/chunks/storage/quantized.rs b/src/chunks/storage/quantized.rs index 5038f10..3d26c4a 100644 --- a/src/chunks/storage/quantized.rs +++ b/src/chunks/storage/quantized.rs @@ -1,4 +1,3 @@ -use std::convert::TryInto; use std::io::{Read, Seek, SeekFrom, Write}; use std::mem; use std::mem::size_of; diff --git a/src/chunks/storage/wrappers.rs b/src/chunks/storage/wrappers.rs index 4b0b16f..f08472b 100644 --- a/src/chunks/storage/wrappers.rs +++ b/src/chunks/storage/wrappers.rs @@ -1,4 +1,3 @@ -use std::convert::TryFrom; #[cfg(feature = "memmap")] use std::fs::File; #[cfg(feature = "memmap")] diff --git a/src/chunks/vocab/simple.rs b/src/chunks/vocab/simple.rs index 170d2ad..d9922dd 100644 --- a/src/chunks/vocab/simple.rs +++ b/src/chunks/vocab/simple.rs @@ -1,5 +1,4 @@ use std::collections::HashMap; -use std::convert::TryInto; use std::io::{Read, Seek, Write}; use std::mem::size_of; diff --git a/src/chunks/vocab/subword.rs b/src/chunks/vocab/subword.rs index a4784ea..253da20 100644 --- a/src/chunks/vocab/subword.rs +++ b/src/chunks/vocab/subword.rs @@ -1,5 +1,4 @@ use std::collections::HashMap; -use std::convert::TryFrom; use std::io; use std::io::{ErrorKind, Read, Seek, Write}; use std::mem::size_of; diff --git a/src/chunks/vocab/wrappers.rs b/src/chunks/vocab/wrappers.rs index 47e5571..4df6527 100644 --- a/src/chunks/vocab/wrappers.rs +++ b/src/chunks/vocab/wrappers.rs @@ -1,4 +1,3 @@ -use std::convert::TryFrom; use std::io::{Read, Seek, SeekFrom, Write}; use byteorder::{LittleEndian, ReadBytesExt}; diff --git a/src/compat/fasttext/io.rs b/src/compat/fasttext/io.rs index 5d62031..6706df5 100644 --- a/src/compat/fasttext/io.rs +++ b/src/compat/fasttext/io.rs @@ -1,4 +1,3 @@ -use std::convert::TryInto; use std::io::{BufRead, Write}; use std::ops::Mul; From 043b7598d26e248eed4da70fc94ddb3e58bc86b4 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Mon, 9 Oct 2023 11:36:44 +0200 Subject: [PATCH 15/15] Bump version to 0.18.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8175461..38dea09 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "finalfusion" -version = "0.17.1" +version = "0.18.0" edition = "2021" rust-version = "1.66" description = "Reader and writer for common word embedding formats"