Merge #62

62: Update alphabet API r=Nemo157 a=Nemo157 Co-authored-by: Wim Looman <[email protected]>
Nullus157 · Nov 6, 2020 · e92d51d · e92d51d
2 parents c219aa3 + 10f22a0
commit e92d51d
Show file tree

Hide file tree

Showing 8 changed files with 232 additions and 156 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "bs58"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Wim Looman <[email protected]>"]
 description = "Another Base58 codec implementation."
 repository = "https://github.com/mycorrhiza/bs58-rs"

diff --git a/cli/Cargo.toml b/cli/Cargo.toml
@@ -15,7 +15,7 @@ name = "bs58"
 path = "src/main.rs"
 
 [dependencies]
-bs58 = { version = "0.3.0", path = ".." }
+bs58 = { version = "0.4.0", path = ".." }
 paw = { version = "1.0.0", default-features = false }
 structopt = { version = "0.3.0", default-features = false, features = ["paw", "color"] }
 anyhow = "1.0.26"
diff --git a/cli/src/main.rs b/cli/src/main.rs
@@ -1,25 +1,26 @@
-use anyhow::anyhow;
+use anyhow::{anyhow, Context};
 use std::{
-    fmt,
+    convert::TryInto,
     io::{self, Read, Write},
     str::FromStr,
 };
 
+#[derive(Debug)]
 enum Alphabet {
     Bitcoin,
     Monero,
     Ripple,
     Flickr,
-    Custom([u8; 58]),
+    Custom(bs58::Alphabet),
 }
 
 impl Alphabet {
-    fn as_bytes(&self) -> &[u8; 58] {
+    fn as_alphabet(&self) -> &bs58::Alphabet {
         match self {
-            Alphabet::Bitcoin => bs58::alphabet::BITCOIN,
-            Alphabet::Monero => bs58::alphabet::MONERO,
-            Alphabet::Ripple => bs58::alphabet::RIPPLE,
-            Alphabet::Flickr => bs58::alphabet::FLICKR,
+            Alphabet::Bitcoin => bs58::Alphabet::BITCOIN,
+            Alphabet::Monero => bs58::Alphabet::MONERO,
+            Alphabet::Ripple => bs58::Alphabet::RIPPLE,
+            Alphabet::Flickr => bs58::Alphabet::FLICKR,
             Alphabet::Custom(custom) => custom,
         }
     }
@@ -36,15 +37,11 @@ impl FromStr for Alphabet {
             "flickr" => Alphabet::Flickr,
             custom if custom.starts_with("custom(") && custom.ends_with(')') => {
                 let alpha = custom.trim_start_matches("custom(").trim_end_matches(')');
-                let bytes = alpha.as_bytes();
-                if bytes.iter().any(|&c| c > 128) {
-                    return Err(anyhow!("custom alphabet must be ASCII characters only"));
-                }
-                if bytes.len() != 58 {
-                    return Err(anyhow!("custom alphabet is not 58 characters long"));
-                }
-                let ptr = bytes.as_ptr() as *const [u8; 58];
-                Alphabet::Custom(unsafe { *ptr })
+                let bytes = alpha
+                    .as_bytes()
+                    .try_into()
+                    .context("custom alphabet is not 58 characters long")?;
+                Alphabet::Custom(bs58::Alphabet::new(bytes)?)
             }
             other => {
                 return Err(anyhow!("'{}' is not a known alphabet", other));
@@ -53,18 +50,6 @@ impl FromStr for Alphabet {
     }
 }
 
-impl fmt::Debug for Alphabet {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Alphabet::Bitcoin => f.debug_tuple("Bitcoin").finish(),
-            Alphabet::Monero => f.debug_tuple("Bitcoin").finish(),
-            Alphabet::Ripple => f.debug_tuple("Bitcoin").finish(),
-            Alphabet::Flickr => f.debug_tuple("Bitcoin").finish(),
-            Alphabet::Custom(custom) => f.debug_tuple("Custom").field(&&custom[..]).finish(),
-        }
-    }
-}
-
 #[derive(Debug, structopt::StructOpt)]
 #[structopt(name = "bs58", setting = structopt::clap::AppSettings::ColoredHelp)]
 /// A utility for encoding/decoding base58 encoded data.
@@ -87,14 +72,14 @@ fn main(args: Args) -> anyhow::Result<()> {
         io::stdin().read_to_string(&mut input)?;
         let trimmed = input.trim_end();
         let output = bs58::decode(trimmed)
-            .with_alphabet(args.alphabet.as_bytes())
+            .with_alphabet(args.alphabet.as_alphabet())
             .into_vec()?;
         io::stdout().write_all(&output)?;
     } else {
         let mut input = Vec::with_capacity(INITIAL_INPUT_CAPACITY);
         io::stdin().read_to_end(&mut input)?;
         let output = bs58::encode(input)
-            .with_alphabet(args.alphabet.as_bytes())
+            .with_alphabet(args.alphabet.as_alphabet())
             .into_string();
         io::stdout().write_all(output.as_bytes())?;
     }

diff --git a/src/alphabet.rs b/src/alphabet.rs
@@ -1,70 +1,209 @@
-//! Commonly used Base58 alphabets.
-
-/// Bitcoin's alphabet as defined in their Base58Check encoding.
-///
-/// See https://en.bitcoin.it/wiki/Base58Check_encoding#Base58_symbol_chart.
-pub const BITCOIN: &[u8; 58] = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
-
-/// Monero's alphabet as defined in this forum post.
-///
-/// See https://forum.getmonero.org/4/academic-and-technical/221/creating-a-standard-for-physical-coins
-pub const MONERO: &[u8; 58] = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
-
-/// Ripple's alphabet as defined in their wiki.
-///
-/// See https://wiki.ripple.com/Encodings
-pub const RIPPLE: &[u8; 58] = b"rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz";
-
-/// Flickr's alphabet for creating short urls from photo ids.
-///
-/// See https://www.flickr.com/groups/api/discuss/72157616713786392/
-pub const FLICKR: &[u8; 58] = b"123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ";
-
-/// The default alphabet used if none is given. Currently is the
-/// [`BITCOIN`](constant.BITCOIN.html) alphabet.
-pub const DEFAULT: &[u8; 58] = BITCOIN;
-
-/// Prepared Alphabet for [`EncodeBuilder`](crate::encode::EncodeBuilder) and
-/// [`DecodeBuilder`](crate::decode::DecodeBuilder).
+//! Support for configurable alphabets
+
+use core::fmt;
+
+/// Prepared Alphabet for
+/// [`EncodeBuilder::with_alphabet`](crate::encode::EncodeBuilder::with_alphabet) and
+/// [`DecodeBuilder::with_alphabet`](crate::decode::DecodeBuilder::with_alphabet).
 #[derive(Clone, Copy)]
-#[allow(missing_debug_implementations)]
 pub struct Alphabet {
     pub(crate) encode: [u8; 58],
     pub(crate) decode: [u8; 128],
 }
 
+/// Errors that could occur when preparing a Base58 alphabet.
+#[non_exhaustive]
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum Error {
+    /// The alphabet contained a duplicate character at at least 2 indexes.
+    DuplicateCharacter {
+        /// The duplicate character encountered.
+        character: char,
+        /// The first index the character was seen at.
+        first: usize,
+        /// The second index the character was seen at.
+        second: usize,
+    },
+
+    /// The alphabet contained a multi-byte (or non-utf8) character.
+    NonAsciiCharacter {
+        /// The index at which the non-ASCII character was seen.
+        index: usize,
+    },
+}
+
 impl Alphabet {
-    /// Bitcoin's prepared alphabet.
-    pub const BITCOIN: &'static Self = &Self::new(BITCOIN);
-    /// Monero's prepared alphabet.
-    pub const MONERO: &'static Self = &Self::new(MONERO);
-    /// Ripple's prepared alphabet.
-    pub const RIPPLE: &'static Self = &Self::new(RIPPLE);
-    /// Flickr's prepared alphabet.
-    pub const FLICKR: &'static Self = &Self::new(FLICKR);
-    /// The default prepared alphabet used if none is given. Currently is the
-    /// [`Alphabet::Bitcoin`](Alphabet::BITCOIN) alphabet.
+    /// Bitcoin's alphabet as defined in their Base58Check encoding.
+    ///
+    /// See <https://en.bitcoin.it/wiki/Base58Check_encoding#Base58_symbol_chart>
+    pub const BITCOIN: &'static Self =
+        &Self::new_unwrap(b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz");
+
+    /// Monero's alphabet as defined in this forum post.
+    ///
+    /// See <https://forum.getmonero.org/4/academic-and-technical/221/creating-a-standard-for-physical-coins>
+    pub const MONERO: &'static Self =
+        &Self::new_unwrap(b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz");
+
+    /// Ripple's alphabet as defined in their wiki.
+    ///
+    /// See <https://wiki.ripple.com/Encodings>
+    pub const RIPPLE: &'static Self =
+        &Self::new_unwrap(b"rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz");
+
+    /// Flickr's alphabet for creating short urls from photo ids.
+    ///
+    /// See <https://www.flickr.com/groups/api/discuss/72157616713786392/>
+    pub const FLICKR: &'static Self =
+        &Self::new_unwrap(b"123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ");
+
+    /// The default alphabet used if none is given. Currently is the
+    /// [`BITCOIN`](Self::BITCOIN) alphabet.
     pub const DEFAULT: &'static Self = Self::BITCOIN;
 
-    /// Create prepared alphabet.
-    pub const fn new(base: &[u8; 58]) -> Alphabet {
+    /// Create prepared alphabet, checks that the alphabet is pure ASCII and that there are no
+    /// duplicate characters, which would result in inconsistent encoding/decoding
+    ///
+    /// ```rust
+    /// let alpha = bs58::Alphabet::new(
+    ///     b" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXY"
+    /// )?;
+    ///
+    /// let decoded = bs58::decode("he11owor1d")
+    ///     .with_alphabet(bs58::Alphabet::RIPPLE)
+    ///     .into_vec()?;
+    /// let encoded = bs58::encode(decoded)
+    ///     .with_alphabet(&alpha)
+    ///     .into_string();
+    ///
+    /// assert_eq!("#ERRN)N RD", encoded);
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    /// ## Errors
+    ///
+    /// ### Duplicate Character
+    ///
+    /// ```rust
+    /// let alpha = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+    /// assert_eq!(
+    ///     bs58::alphabet::Error::DuplicateCharacter { character: 'a', first: 0, second: 1 },
+    ///     bs58::Alphabet::new(alpha).unwrap_err());
+    /// ```
+    ///
+    /// ### Non-ASCII Character
+    ///
+    /// ```rust
+    /// let mut alpha = *b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+    /// alpha[1] = 255;
+    /// assert_eq!(
+    ///     bs58::alphabet::Error::NonAsciiCharacter { index: 1 },
+    ///     bs58::Alphabet::new(&alpha).unwrap_err());
+    /// ```
+    pub const fn new(base: &[u8; 58]) -> Result<Self, Error> {
         let mut encode = [0x00; 58];
         let mut decode = [0xFF; 128];
 
         let mut i = 0;
         while i < encode.len() {
+            if base[i] >= 128 {
+                return Err(Error::NonAsciiCharacter { index: i });
+            }
+            if decode[base[i] as usize] != 0xFF {
+                return Err(Error::DuplicateCharacter {
+                    character: base[i] as char,
+                    first: decode[base[i] as usize] as usize,
+                    second: i,
+                });
+            }
             encode[i] = base[i];
             decode[base[i] as usize] = i as u8;
             i += 1;
         }
 
-        Alphabet { encode, decode }
+        Ok(Self { encode, decode })
+    }
+
+    /// Same as [`Self::new`], but gives a panic instead of an [`Err`] on bad input.
+    ///
+    /// Intended to support usage in `const` context until [`Result::unwrap`] is able to be called.
+    ///
+    /// ```rust
+    /// const ALPHA: &'static bs58::Alphabet = &bs58::Alphabet::new_unwrap(
+    ///     b" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXY"
+    /// );
+    ///
+    /// let decoded = bs58::decode("he11owor1d")
+    ///     .with_alphabet(bs58::Alphabet::RIPPLE)
+    ///     .into_vec()?;
+    /// let encoded = bs58::encode(decoded)
+    ///     .with_alphabet(ALPHA)
+    ///     .into_string();
+    ///
+    /// assert_eq!("#ERRN)N RD", encoded);
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// If your alphabet is inconsistent then this will fail to compile in a `const` context:
+    ///
+    /// ```compile_fail
+    /// const _: &'static bs58::Alphabet = &bs58::Alphabet::new_unwrap(
+    ///     b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+    /// );
+    /// ```
+    pub const fn new_unwrap(base: &[u8; 58]) -> Self {
+        let result = Self::new(base);
+        #[allow(unconditional_panic)] // https://github.com/rust-lang/rust/issues/78803
+        [][match result {
+            Ok(alphabet) => return alphabet,
+            Err(_) => 0,
+        }]
     }
 }
 
-/// `std::borrow::Cow` alternative.
-#[allow(variant_size_differences)]
-pub(crate) enum AlphabetCow<'a> {
-    Borrowed(&'a Alphabet),
-    Owned(Alphabet),
+impl fmt::Debug for Alphabet {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if let Ok(s) = core::str::from_utf8(&self.encode) {
+            f.debug_tuple("Alphabet").field(&s).finish()
+        } else {
+            unreachable!()
+        }
+    }
+}
+
+#[cfg(feature = "std")]
+#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
+impl std::error::Error for Error {}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Error::DuplicateCharacter {
+                character,
+                first,
+                second,
+            } => write!(
+                f,
+                "alphabet contained a duplicate character `{}` at indexes {} and {}",
+                character, first, second,
+            ),
+            Error::NonAsciiCharacter { index } => {
+                write!(f, "alphabet contained a non-ascii character at {}", index)
+            }
+        }
+    }
+}
+
+// Force evaluation of the associated constants to make sure they don't error
+const _: () = {
+    let _ = Alphabet::BITCOIN;
+    let _ = Alphabet::MONERO;
+    let _ = Alphabet::RIPPLE;
+    let _ = Alphabet::FLICKR;
+    let _ = Alphabet::DEFAULT;
+};
+
+#[test]
+#[should_panic]
+fn test_new_unwrap_does_panic() {
+    Alphabet::new_unwrap(b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
 }