From 40e162c843896ee6d29af56efb1a1e574da8109f Mon Sep 17 00:00:00 2001 From: Lyra Naeseth Date: Sun, 3 Dec 2023 15:47:59 -0800 Subject: [PATCH 1/2] Rewrite the crate docstring --- src/alphabet.rs | 12 +-- src/lib.rs | 222 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 165 insertions(+), 69 deletions(-) diff --git a/src/alphabet.rs b/src/alphabet.rs index efdc56e..cfbb402 100644 --- a/src/alphabet.rs +++ b/src/alphabet.rs @@ -160,21 +160,21 @@ impl fmt::Display for ParseAlphabetError { #[cfg(any(feature = "std", test))] impl error::Error for ParseAlphabetError {} -/// The standard alphabet (uses `+` and `/`). +/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][]. /// -/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3). +/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4 pub const STANDARD: Alphabet = Alphabet::from_str_unchecked( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", ); -/// The URL safe alphabet (uses `-` and `_`). +/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][]. /// -/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4). +/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5 pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", ); -/// The `crypt(3)` alphabet (uses `.` and `/` as the first two values). +/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters). /// /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. pub const CRYPT: Alphabet = Alphabet::from_str_unchecked( @@ -186,7 +186,7 @@ pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked( "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", ); -/// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`). +/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`). /// /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked( diff --git a/src/lib.rs b/src/lib.rs index 50f433d..ccbfc8c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,100 +1,196 @@ -//! # Getting started +//! Correct, fast, and configurable [base64][] decoding and encoding. Base64 +//! transports binary data efficiently in contexts where only plain text is +//! allowed. //! -//! 1. Perhaps one of the preconfigured engines in [engine::general_purpose] will suit, e.g. -//! [engine::general_purpose::STANDARD_NO_PAD]. -//! - These are re-exported in [prelude] with a `BASE64_` prefix for those who prefer to -//! `use base64::prelude::*` or equivalent, e.g. [prelude::BASE64_STANDARD_NO_PAD] -//! 1. If not, choose which alphabet you want. Most usage will want [alphabet::STANDARD] or [alphabet::URL_SAFE]. -//! 1. Choose which [Engine] implementation you want. For the moment there is only one: [engine::GeneralPurpose]. -//! 1. Configure the engine appropriately using the engine's `Config` type. -//! - This is where you'll select whether to add padding (when encoding) or expect it (when -//! decoding). If given the choice, prefer no padding. -//! 1. Build the engine using the selected alphabet and config. +//! [base64]: https://developer.mozilla.org/en-US/docs/Glossary/Base64 //! -//! For more detail, see below. +//! # Usage //! -//! ## Alphabets +//! Use an [`Engine`] to decode or encode base64, configured with the base64 +//! alphabet and padding behavior best suited to your application. //! -//! An [alphabet::Alphabet] defines what ASCII symbols are used to encode to or decode from. +//! ## Engine setup //! -//! Constants in [alphabet] like [alphabet::STANDARD] or [alphabet::URL_SAFE] provide commonly used -//! alphabets, but you can also build your own custom [alphabet::Alphabet] if needed. +//! There is more than one way to encode a stream of bytes as “base64”. +//! Different applications use different encoding +//! [alphabets][alphabet::Alphabet] and +//! [padding behaviors][engine::general_purpose::GeneralPurposeConfig]. //! -//! ## Engines +//! ### Encoding alphabet //! -//! Once you have an `Alphabet`, you can pick which `Engine` you want. A few parts of the public -//! API provide a default, but otherwise the user must provide an `Engine` to use. +//! Almost all base64 [alphabets][alphabet::Alphabet] use `A-Z`, `a-z`, and +//! `0-9`, which gives nearly 64 characters (26 + 26 + 10 = 62), but they differ +//! in their choice of their final 2. //! -//! See [Engine] for more. +//! Most applications use the [standard][alphabet::STANDARD] alphabet specified +//! in [RFC 4648][rfc-alphabet]. If that’s all you need, you can get started +//! quickly by using the pre-configured +//! [`STANDARD`][engine::general_purpose::STANDARD] engine: //! -//! ## Config +#![cfg_attr(feature = "alloc", doc = "```")] +#![cfg_attr(not(feature = "alloc"), doc = "```ignore")] +//! use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _}; //! -//! In addition to an `Alphabet`, constructing an `Engine` also requires an [engine::Config]. Each -//! `Engine` has a corresponding `Config` implementation since different `Engine`s may offer different -//! levels of configurability. +//! # fn main() -> Result<(), base64::DecodeError> { +//! assert_eq!(BASE64.decode(b"+uwgVQA=")?, b"\xFA\xEC\x20\x55\0"); +//! assert_eq!(BASE64.encode(b"\xFF\xEC\x20\x55\0"), "/+wgVQA="); +//! # Ok(()) +//! # } +//! ``` //! -//! # Encoding +//! [rfc-alphabet]: https://datatracker.ietf.org/doc/html/rfc4648#section-4 //! -//! Several different encoding methods on [Engine] are available to you depending on your desire for -//! convenience vs performance. +//! Other common alphabets are available in the [`alphabet`] module. //! -//! | Method | Output | Allocates | -//! | ------------------------ | ---------------------------- | ------------------------------ | -//! | [Engine::encode] | Returns a new `String` | Always | -//! | [Engine::encode_string] | Appends to provided `String` | Only if `String` needs to grow | -//! | [Engine::encode_slice] | Writes to provided `&[u8]` | Never - fastest | +//! #### URL-safe alphabet //! -//! All of the encoding methods will pad as per the engine's config. +//! The standard alphabet uses `+` and `/` as its two non-alphanumeric tokens, +//! which cannot be safely used in URL’s without encoding them as `%2B` and +//! `%2F`. //! -//! # Decoding +//! To avoid that, some applications use a [“URL-safe” alphabet][alphabet::URL_SAFE], +//! which uses `-` and `_` instead. To use that alternative alphabet, use the +//! [`URL_SAFE`][engine::general_purpose::URL_SAFE] engine: //! -//! Just as for encoding, there are different decoding methods available. +#![cfg_attr(feature = "alloc", doc = "```")] +#![cfg_attr(not(feature = "alloc"), doc = "```ignore")] +//! use base64::{engine::general_purpose::URL_SAFE as BASE64, Engine as _}; //! -//! | Method | Output | Allocates | -//! | ------------------------ | ----------------------------- | ------------------------------ | -//! | [Engine::decode] | Returns a new `Vec` | Always | -//! | [Engine::decode_vec] | Appends to provided `Vec` | Only if `Vec` needs to grow | -//! | [Engine::decode_slice] | Writes to provided `&[u8]` | Never - fastest | +//! # fn main() -> Result<(), base64::DecodeError> { +//! assert_eq!(BASE64.decode(b"-uwgVQA=")?, b"\xFA\xEC\x20\x55\0"); +//! assert_eq!(BASE64.encode(b"\xFF\xEC\x20\x55\0"), "_-wgVQA="); +//! # Ok(()) +//! # } +//! ``` //! -//! Unlike encoding, where all possible input is valid, decoding can fail (see [DecodeError]). +//! ### Padding characters //! -//! Input can be invalid because it has invalid characters or invalid padding. The nature of how -//! padding is checked depends on the engine's config. -//! Whitespace in the input is invalid, just like any other non-base64 byte. +//! Each base64 character represents 6 bits (2⁶ = 64) of the original binary +//! data, and every 3 bytes of input binary data will encode to 4 base64 +//! characters (8 bits × 3 = 6 bits × 4 = 24 bits). //! -//! # `Read` and `Write` +//! When the input is not an even multiple of 3 bytes in length, [canonical][] +//! base64 encoders insert padding characters at the end, so that the output +//! length is always a multiple of 4: //! -//! To decode a [std::io::Read] of b64 bytes, wrap a reader (file, network socket, etc) with -//! [read::DecoderReader]. +//! [canonical]: https://datatracker.ietf.org/doc/html/rfc4648#section-3.5 //! -//! To write raw bytes and have them b64 encoded on the fly, wrap a [std::io::Write] with -//! [write::EncoderWriter]. +#![cfg_attr(feature = "alloc", doc = "```")] +#![cfg_attr(not(feature = "alloc"), doc = "```ignore")] +//! use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _}; //! -//! There is some performance overhead (15% or so) because of the necessary buffer shuffling -- -//! still fast enough that almost nobody cares. Also, these implementations do not heap allocate. +//! assert_eq!(BASE64.encode(b""), ""); +//! assert_eq!(BASE64.encode(b"f"), "Zg=="); +//! assert_eq!(BASE64.encode(b"fo"), "Zm8="); +//! assert_eq!(BASE64.encode(b"foo"), "Zm9v"); +//! ``` //! -//! # `Display` +//! Canonical encoding ensures that base64 encodings will be exactly the same, +//! byte-for-byte, regardless of input length. But the `=` padding characters +//! aren’t necessary for decoding, and they may be omitted by using a +//! [`NO_PAD`][engine::general_purpose::NO_PAD] configuration: //! -//! See [display] for how to transparently base64-encode data via a `Display` implementation. +#![cfg_attr(feature = "alloc", doc = "```")] +#![cfg_attr(not(feature = "alloc"), doc = "```ignore")] +//! use base64::{engine::general_purpose::STANDARD_NO_PAD as BASE64, Engine as _}; //! -//! # Examples +//! assert_eq!(BASE64.encode(b""), ""); +//! assert_eq!(BASE64.encode(b"f"), "Zg"); +//! assert_eq!(BASE64.encode(b"fo"), "Zm8"); +//! assert_eq!(BASE64.encode(b"foo"), "Zm9v"); +//! ``` //! -//! ## Using predefined engines +//! The pre-configured `NO_PAD` engines will reject inputs containing padding +//! `=` characters. To encode without padding and still accept padding while +//! decoding, create an [engine][engine::general_purpose::GeneralPurpose] with +//! that [padding mode][engine::DecodePaddingMode]. //! #![cfg_attr(feature = "alloc", doc = "```")] #![cfg_attr(not(feature = "alloc"), doc = "```ignore")] -//! use base64::{Engine as _, engine::general_purpose}; +//! # use base64::{engine::general_purpose::STANDARD_NO_PAD, Engine as _}; +//! assert_eq!(STANDARD_NO_PAD.decode(b"Zm8="), Err(base64::DecodeError::InvalidPadding)); +//! ``` +//! +//! ## Memory allocation +//! +//! The [decode][Engine::decode()] and [encode][Engine::encode()] engine methods +//! allocate memory for their results – `decode` returns a `Vec` and +//! `encode` returns a `String`. To instead decode or encode into a buffer that +//! you allocated, use one of the alternative methods: +//! +//! #### Decoding +//! +//! | Method | Output | Allocates memory | +//! | -------------------------- | ----------------------------- | ----------------------------- | +//! | [`Engine::decode`] | returns a new `Vec` | always | +//! | [`Engine::decode_vec`] | appends to provided `Vec` | if `Vec` lacks capacity | +//! | [`Engine::decode_slice`] | writes to provided `&[u8]` | never //! -//! let orig = b"data"; -//! let encoded: String = general_purpose::STANDARD_NO_PAD.encode(orig); -//! assert_eq!("ZGF0YQ", encoded); -//! assert_eq!(orig.as_slice(), &general_purpose::STANDARD_NO_PAD.decode(encoded).unwrap()); +//! #### Encoding //! -//! // or, URL-safe -//! let encoded_url = general_purpose::URL_SAFE_NO_PAD.encode(orig); +//! | Method | Output | Allocates memory | +//! | -------------------------- | ---------------------------- | ------------------------------ | +//! | [`Engine::encode`] | returns a new `String` | always | +//! | [`Engine::encode_string`] | appends to provided `String` | if `String` lacks capacity | +//! | [`Engine::encode_slice`] | writes to provided `&[u8]` | never | +//! +//! ## Input and output +//! +//! The `base64` crate can [decode][Engine::decode()] and +//! [encode][Engine::encode()] values in memory, or +//! [`DecoderReader`][read::DecoderReader] and +//! [`EncoderWriter`][write::EncoderWriter] provide streaming decoding and +//! encoding for any [readable][std::io::Read] or [writable][std::io::Write] +//! byte stream. +//! +//! #### Decoding +//! +#![cfg_attr(feature = "std", doc = "```")] +#![cfg_attr(not(feature = "std"), doc = "```ignore")] +//! # use std::io; +//! use base64::{engine::general_purpose::STANDARD, read::DecoderReader}; +//! +//! # fn main() -> Result<(), Box> { +//! let mut input = io::stdin(); +//! let mut decoder = DecoderReader::new(&mut input, &STANDARD); +//! io::copy(&mut decoder, &mut io::stdout())?; +//! # Ok(()) +//! # } +//! ``` +//! +//! #### Encoding +//! +#![cfg_attr(feature = "std", doc = "```")] +#![cfg_attr(not(feature = "std"), doc = "```ignore")] +//! # use std::io; +//! use base64::{engine::general_purpose::STANDARD, write::EncoderWriter}; +//! +//! # fn main() -> Result<(), Box> { +//! let mut output = io::stdout(); +//! let mut encoder = EncoderWriter::new(&mut output, &STANDARD); +//! io::copy(&mut io::stdin(), &mut encoder)?; +//! # Ok(()) +//! # } +//! ``` +//! +//! #### Display +//! +//! If you only need a base64 representation for implementing the +//! [`Display`][std::fmt::Display] trait, use +//! [`Base64Display`][display::Base64Display]: +//! +//! ``` +//! use base64::{display::Base64Display, engine::general_purpose::STANDARD}; +//! +//! let value = Base64Display::new(b"\0\x01\x02\x03", &STANDARD); +//! assert_eq!("base64: AAECAw==", format!("base64: {}", value)); //! ``` //! -//! ## Custom alphabet, config, and engine +//! # Configuration +//! +//! Decoding and encoding behavior can be customized by creating an +//! [engine][engine::GeneralPurpose] with an [alphabet][alphabet::Alphabet] and +//! [padding configuration][engine::GeneralPurposeConfig]: //! #![cfg_attr(feature = "alloc", doc = "```")] #![cfg_attr(not(feature = "alloc"), doc = "```ignore")] From 72f25a8b033c09f201d00b13f0472ec0772ed85e Mon Sep 17 00:00:00 2001 From: Lyra Naeseth Date: Sun, 3 Dec 2023 16:12:48 -0800 Subject: [PATCH 2/2] Simplify the example program --- Cargo.toml | 1 + examples/base64.rs | 42 +++++++++++++++++------------------------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 36b0778..1ca4364 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ rustdoc-args = ["--generate-link-to-definition"] criterion = "0.4.0" rand = { version = "0.8.5", features = ["small_rng"] } structopt = "0.3.26" +strum = { version = "0.25", features = ["derive"] } # test fixtures for engine tests rstest = "0.13.0" rstest_reuse = "0.6.0" diff --git a/examples/base64.rs b/examples/base64.rs index 0a214d2..5c82061 100644 --- a/examples/base64.rs +++ b/examples/base64.rs @@ -2,46 +2,35 @@ use std::fs::File; use std::io::{self, Read}; use std::path::PathBuf; use std::process; -use std::str::FromStr; use base64::{alphabet, engine, read, write}; use structopt::StructOpt; -#[derive(Debug, StructOpt)] +#[derive(StructOpt, strum::EnumString, Default, Debug)] +#[strum(serialize_all = "kebab-case")] enum Alphabet { + #[default] Standard, UrlSafe, } -impl Default for Alphabet { - fn default() -> Self { - Self::Standard - } -} - -impl FromStr for Alphabet { - type Err = String; - fn from_str(s: &str) -> Result { - match s { - "standard" => Ok(Self::Standard), - "urlsafe" => Ok(Self::UrlSafe), - _ => Err(format!("alphabet '{}' unrecognized", s)), - } - } -} - /// Base64 encode or decode FILE (or standard input), to standard output. #[derive(Debug, StructOpt)] struct Opt { - /// decode data + /// Decode the base64-encoded input (default: encode the input as base64). #[structopt(short = "d", long = "decode")] decode: bool, - /// The alphabet to choose. Defaults to the standard base64 alphabet. - /// Supported alphabets include "standard" and "urlsafe". + + /// The encoding alphabet: "standard" (default) or "url-safe". #[structopt(long = "alphabet")] alphabet: Option, - /// The file to encode/decode. - #[structopt(parse(from_os_str))] + + /// Omit padding characters while encoding, and reject them while decoding. + #[structopt(short = "p")] + no_padding: bool, + + /// The file to encode or decode. + #[structopt(name = "FILE", parse(from_os_str))] file: Option, } @@ -66,7 +55,10 @@ fn main() { Alphabet::Standard => alphabet::STANDARD, Alphabet::UrlSafe => alphabet::URL_SAFE, }, - engine::general_purpose::PAD, + match opt.no_padding { + true => engine::general_purpose::NO_PAD, + false => engine::general_purpose::PAD, + }, ); let stdout = io::stdout();