docs: update README and example tests for configurability

noir-lang · Oct 29, 2024 · dd5e458 · dd5e458
1 parent 9a26593
commit dd5e458
Show file tree

Hide file tree

Showing 2 changed files with 67 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -2,38 +2,58 @@
 
 A Base64 encoding/decoding library written in Noir which can encode arbitrary byte arrays into Base64 and decode Base64-encoded byte arrays (e.g. `"SGVsbG8gV29ybGQ=".as_bytes()`).
 
-# Usage
+## Usage
+### Configuration
+Start by selecting the encoder or decoder for your configuration. These are defined separately so that only one lookup table will be instantiated at a time, since many cases will require either an encoder or a decoder but not both.
 
-### `fn base64_encode`
-Takes an arbitrary byte array as input, unpacks it into Base64 values, then encodes each Base64 value into an ASCII character according to the [standard Base64 alphabet](https://datatracker.ietf.org/doc/html/rfc4648#section-4), to return a byte array representing the Base64 encoding. The encoded result is *not padded*, so padding must be handled separately.
+RFC 4648 specifies multiple alphabets, including the [standard Base 64 Alphabet](https://datatracker.ietf.org/doc/html/rfc4648#section-4) known as `base64` and the ["URL and Filename Safe Alphabet"](https://datatracker.ietf.org/doc/html/rfc4648#section-5) known as `base64url`. It also specifies that [padding](https://datatracker.ietf.org/doc/html/rfc4648#section-3.2) should be required in the general case but can be explicitly omitted as an option.
 
-### `fn base64_decode`
-Takes an ASCII byte array that encodes a Base64 string and decodes it into bytes. Input data is expected to be unpadded, so padding characters will cause decoding to fail.
+Available encoder configurations:
+- `BASE64_ENCODER_STANDARD`: uses the standard alphabet (base64) and adds padding.
+- `BASE64_ENCODER_STANDARD_NO_PAD`: uses the standard alphabet (base64), but omits padding.
+- `BASE64_ENCODER_URL_SAFE`: uses the "URL and Filename Safe Alphabet" (base64url) and adds padding.
+- `BASE64_ENCODER_URL_SAFE_NO_PAD`: uses the "URL and Filename Safe Alphabet" (base64url), but omits padding.
 
-### `fn base64_encode_elements`
-Takes an input byte array of ASCII characters and produces an output byte array of base64-encoded characters. Data is not packed i.e. each output array element maps to a 6-bit base64 character.
+Available decoder configurations:
+- `BASE64_DECODER_STANDARD`: uses the standard alphabet (base64) and expects correct padding.
+- `BASE64_DECODER_STANDARD_NO_PAD`: uses the standard alphabet (base64), but expects all padding characters to have been stripped. A padding character encoutered during decoding will trigger an error.
+- `BASE64_DECODER_URL_SAFE`: uses the "URL and Filename Safe Alphabet" (base64url) and expects correct padding.
+- `BASE64_DECODER_URL_SAFE_NO_PAD`: uses the "URL and Filename Safe Alphabet" (base64url), but expects all padding characters to have been stripped. A padding character encoutered during decoding will trigger an error.
 
-### `fn base64_decode_elements`
-Takes an input byte array of base64 characters and produces an output byte array of ASCII characters. Input data is not packed i.e. each input element maps to a 6-bit base64 character. Input data is expected not to contain padding characters. Padding characters will cause decoding to fail.
+### `fn encode`
+Takes an arbitrary byte array as input, encodes it in Base64 according to the alphabet and padding rules specified by the configuration, then encodes each Base64 character into UTF-8 to return a byte array representing the Base64 encoding.
 
-### Example usage
+```
+// bytes: [u8; N]
+let base64 = BASE64_ENCODER_STANDARD.encode(bytes);
+```
+
+### `fn decode`
+Takes a utf-8 byte array that encodes a Base64 string and attempts to decoded it into bytes according to the provided configuration specifying the alphabet and padding rules.
+
+```
+// base64: [u8; N]
+let bytes = BASE64_DECODER_STANDARD.decode(base64);
+```
+
+## Example usage
 (see tests in `lib.nr` for more examples)
 
 ```
-use dep::noir_base64;
 fn encode_and_decode() {
     let input: str<88> = "The quick brown fox jumps over the lazy dog, while 42 ravens perch atop a rusty mailbox.";
-    let base64_encoded: str<118> = "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZywgd2hpbGUgNDIgcmF2ZW5zIHBlcmNoIGF0b3AgYSBydXN0eSBtYWlsYm94Lg";
+    let base64_encoded = "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZywgd2hpbGUgNDIgcmF2ZW5zIHBlcmNoIGF0b3AgYSBydXN0eSBtYWlsYm94Lg==";
 
-    let encoded:[u8; 118] = noir_base64::base64_encode(input.as_bytes());
+    let encoded:[u8; 120] = BASE64_ENCODER_STANDARD.encode(input.as_bytes());
     assert(encoded == base64_encoded.as_bytes());
 
-    let decoded: [u8; 88] = noir_base64::base64_decode(encoded);
+    let decoded: [u8; 88] = BASE64_DECODER_STANDARD.decode(encoded);
     assert(decoded == input.as_bytes());
 }
 ```
 
-# Costs
 
-- `base64_encode` will encode an array of 88 bytes in ~1182 gates, plus a ~64 gate cost to initialize the encoding lookup table (the initialization cost is incurred once regardless of the number of encodings).
-- `base64_decode` will decode an array of 118 bytes in ~2150 gates, plus a ~256 gate cost to initialize the decoding lookup table (the initialization cost is incurred once regardless of the number of decodings).
+## Costs
+
+- `encode` will encode an array of 88 bytes in ~1182 gates, plus a ~64 gate cost to initialize the encoding lookup table (the initialization cost is incurred once regardless of the number of encodings).
+- `decode` will decode an array of 118 bytes in ~2150 gates, plus a ~256 gate cost to initialize the decoding lookup table (the initialization cost is incurred once regardless of the number of decodings).
diff --git a/src/lib.nr b/src/lib.nr
@@ -1,3 +1,20 @@
+// Encodings use the alphabets and padding rules specified in RFC 4648
+// (https://datatracker.ietf.org/doc/html/rfc4648:
+//
+//    A 65-character subset of US-ASCII is used, enabling 6 bits to be
+//    represented per printable character.  (The extra 65th character, "=",
+//    is used to signify a special processing function.)
+//
+//    The encoding process represents 24-bit groups of input bits as output
+//    strings of 4 encoded characters.  Proceeding from left to right, a
+//    24-bit input group is formed by concatenating 3 8-bit input groups.
+//    These 24 bits are then treated as 4 concatenated 6-bit groups, each
+//    of which is translated into a single character in the base 64
+//    alphabet.
+//
+//    Each 6-bit group is used as an index into an array of 64 printable
+//    characters.  The character referenced by the index is placed in the
+//    output string.
 mod encoder;
 pub use encoder::{
     STANDARD as BASE64_ENCODER_STANDARD, STANDARD_NO_PAD as BASE64_ENCODER_STANDARD_NO_PAD,
@@ -14,6 +31,19 @@ pub(crate) mod defaults {
     pub(crate) global BASE64_PADDING_CHAR: u8 = 61;
 }
 
+#[test]
+fn encode_and_decode() {
+    let input: str<88> =
+        "The quick brown fox jumps over the lazy dog, while 42 ravens perch atop a rusty mailbox.";
+    let base64_encoded = "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZywgd2hpbGUgNDIgcmF2ZW5zIHBlcmNoIGF0b3AgYSBydXN0eSBtYWlsYm94Lg==";
+
+    let encoded: [u8; 120] = BASE64_ENCODER_STANDARD.encode(input.as_bytes());
+    assert(encoded == base64_encoded.as_bytes());
+
+    let decoded: [u8; 88] = BASE64_DECODER_STANDARD.decode(encoded);
+    assert(decoded == input.as_bytes());
+}
+
 #[test]
 fn encode_and_decode_no_pad() {
     let input: str<88> =