Extend Base64 module

Extended base64 module to support encoding and decoding: - using URL-safe characters - with and without padding '=' at the end
iotivity · Sep 29, 2023 · f08116e · f08116e
1 parent e5899d1
commit f08116e
Show file tree

Hide file tree

Showing 3 changed files with 676 additions and 161 deletions.
diff --git a/api/oc_base64.c b/api/oc_base64.c
@@ -18,50 +18,69 @@
 
 #include "oc_base64.h"
 
+#include <assert.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+
 /* This module implements routines for Base64 encoding/decoding
- * based on their definitions in RFC 4648.
- */
+ * based on their definitions in RFC 4648. */
+
+size_t
+oc_base64_encoded_output_size(size_t size, bool padding)
+{
+  if (padding) {
+    size_t output_size = (size / 3) * 4;
+    if (size % 3 != 0) {
+      output_size += 4;
+    }
+    return output_size;
+  }
+  return (size * 4 + 2) / 3;
+}
 
 int
-oc_base64_encode(const uint8_t *input, size_t input_len, uint8_t *output_buffer,
-                 size_t output_buffer_len)
+oc_base64_encode_v1(oc_base64_encoding_t encoding, bool padding,
+                    const uint8_t *input, size_t input_size,
+                    uint8_t *output_buffer, size_t output_buffer_size)
 {
+  /* handle the case that an empty input is provided */
+  if (input_size == 0) {
+    return 0;
+  }
+
   /* The Base64 alphabet. This table provides a mapping from 6-bit binary
-   * values to Base64 characters.
-   */
-  const uint8_t alphabet[65] = {
+   * values to Base64 characters. */
+  uint8_t alphabet[64] = {
     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
     'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
     'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
-    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '=',
+    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',
   };
+  /** For URLs the '+' and '/' are replaced */
+  if (encoding == OC_BASE64_ENCODING_URL) {
+    alphabet[62] = '-';
+    alphabet[63] = '_';
+  }
 
   /* Calculate the length of the Base64 encoded output.
    * Every sequence of 3 bytes (with padding, if necessary)
    * is represented as 4 bytes (characters) in Base64.
    */
-  size_t output_len = (input_len / 3) * 4;
-  if (input_len % 3 != 0) {
-    output_len += 4;
-  }
-
+  size_t output_len = oc_base64_encoded_output_size(input_size, padding);
   /* If the output buffer provided was not large enough, return an error. */
-  if (output_buffer_len < output_len) {
+  if (output_buffer_size < output_len) {
     return -1;
   }
 
-  /* handle the case that an empty input is provided */
-  if (input_len == 0) {
-    output_buffer[0] = '\0';
-  }
   size_t i;
   int j = 0;
   uint8_t val = 0;
   /* Process every byte of input by keeping state across 3 byte blocks
    * to capture 4 6-bit binary blocks that each map to a Base64 character.
    */
-  for (i = 0; i < input_len; i++) {
+  for (i = 0; i < input_size; i++) {
     /* This is the first byte of a 3 byte block of input. Its first
      * 6 bits would be encoded into a character from the Base64 alphabet.
      * Its last 2 bits would be the first 2 bits of the following 6-bit binary
@@ -102,72 +121,139 @@ oc_base64_encode(const uint8_t *input, size_t input_len, uint8_t *output_buffer,
     }
   }
 
-  /* If the input size wasn't a multiple of 3, we would
-   * have leftover bits to encode into the next Base64 character.
-   */
+  /* If the input size wasn't a multiple of 3, we would have leftover bits to
+   * encode into the next Base64 character. */
   if (i % 3 != 0) {
     output_buffer[j++] = alphabet[val];
   }
 
-  /* Any leftover space in the encoded string is padded with the
-   * = character.
-   */
+  if (!padding) {
+    assert(j == (int)output_len);
+    return j;
+  }
+
+  /* Any leftover space in the encoded string is padded with the '='
+   * character.*/
   while (j < (int)output_len) {
     output_buffer[j++] = '=';
   }
-
   return j;
 }
 
 int
-oc_base64_decode(uint8_t *str, size_t len)
+oc_base64_encode(const uint8_t *input, size_t input_size,
+                 uint8_t *output_buffer, size_t output_buffer_size)
+{
+  return oc_base64_encode_v1(OC_BASE64_ENCODING_STD, true, input, input_size,
+                             output_buffer, output_buffer_size);
+}
+
+static int
+base64_decode_char(oc_base64_encoding_t encoding, unsigned char c)
 {
-  /* All valid Base64 encoded strings will be multiples of 4 */
-  if (0 != (len % 4)) {
+  if (c >= 'A' && c <= 'Z') {
+    return c - 65;
+  }
+  if (c >= 'a' && c <= 'z') {
+    return c - 71;
+  }
+  if (c >= '0' && c <= '9') {
+    return c + 4;
+  }
+  if (encoding == OC_BASE64_ENCODING_STD) {
+    if (c == '+') {
+      return 62;
+    }
+    if (c == '/') {
+      return 63;
+    }
+  } else {
+    if (c == '-') {
+      return 62;
+    }
+    if (c == '_') {
+      return 63;
+    }
+  }
+  return -1;
+}
+
+int
+oc_base64_decoded_output_size(const uint8_t *input, size_t input_size,
+                              bool padding)
+{
+  if (input_size == 0) {
+    return 0;
+  }
+
+  size_t padding_count = 0;
+  if (padding) {
+    /* All valid padded Base64 encoded strings will be multiples of 4 */
+    if (input_size % 4 != 0) {
+      return -1;
+    }
+    for (size_t i = input_size - 2; i < input_size; ++i) {
+      if (input[i] == '=') {
+        ++padding_count;
+      }
+    }
+  } else {
+    /* All valid non-padded Base64 encoded strings will be multiples of 4, or
+     * would be multiples of 4 with 1 or 2 padding characters, missing 3 padding
+     * characters is invalid */
+    if (input_size % 4 == 1) {
+      return -1;
+    }
+    // simulate valid padding for the calculation
+    padding_count = (input_size % 4) != 0 ? 4 - (input_size % 4) : 0;
+    input_size += padding_count;
+  }
+
+  size_t output_size = (input_size * 3) / 4 - padding_count;
+  assert(output_size <= INT_MAX);
+  return (int)output_size;
+}
+
+int
+oc_base64_decode_v1(oc_base64_encoding_t encoding, bool padding,
+                    const uint8_t *input, size_t input_size,
+                    uint8_t *output_buffer, size_t output_buffer_size)
+{
+  /* Check if the output buffer is large enough */
+  int size = oc_base64_decoded_output_size(input, input_size, padding);
+  if (size < 0 || output_buffer_size < (size_t)size) {
     return -1;
   }
+
   /* The Base64 input string is decoded in-place. */
-  size_t i = 0;
   int j = 0;
   unsigned char val_c = 0;
   unsigned char val_s = 0;
 
   /* Process every input character */
-  for (i = 0; i < len; i++) {
-    val_s = str[i];
+  for (size_t i = 0; i < input_size; i++) {
+    val_s = input[i];
 
-    /* Perform a reverse-mapping from Base64 character to
-     * a 6-bit binary sequence.
-     */
-    if (val_s >= 'A' && val_s <= 'Z')
-      val_s -= 65;
-    else if (val_s >= 'a' && val_s <= 'z')
-      val_s -= 71;
-    else if (val_s >= '0' && val_s <= '9')
-      val_s += 4;
-    else if (val_s == '+')
-      val_s = 62;
-    else if (val_s == '/')
-      val_s = 63;
     /* Break if we encounter the padding character.
      * The input buffer str now contains the fully decoded string.
      */
-    else if (val_s == '=') {
+    if (padding && val_s == '=') {
       /* Padding character "=" can only show up as last 2 characters */
-      if (i < len - 2) {
+      if (i < input_size - 2) {
         return -1;
       }
-      if (i == len - 2 && '=' != str[i + 1]) {
+      if (i == input_size - 2 && '=' != input[i + 1]) {
         return -1;
       }
       break;
     }
-    /* Return an error if we encounter a character that is outside
-     * of the Base64 alphabet.
-     */
-    else {
+
+    /* Convert the Base64 character to its 6-bit binary value */
+    int val = base64_decode_char(encoding, val_s);
+    if (val < 0) {
       return -1;
     }
+    val_s = (unsigned char)val;
 
     /* Decode all 4 byte blocks to 3 bytes of binary output by
      * laying out their 6-bit blocks into a sequence of 3 bytes.
@@ -179,26 +265,37 @@ oc_base64_decode(uint8_t *str, size_t len)
     } else if (i % 4 == 1) {
       /* Last 2 bits of output byte 1 */
       val_c |= (val_s >> 4);
-      str[j++] = val_c;
+      output_buffer[j++] = val_c;
       /* 1st 4 bits of output byte 2 */
       val_c = (uint8_t)(val_s << 4);
       val_c &= 0xF0;
     } else if (i % 4 == 2) {
       /* Last 4 bits of output byte 2 */
       val_c |= (val_s >> 2);
-      str[j++] = val_c;
+      output_buffer[j++] = val_c;
       /* 1st 2 bits of output byte 3 */
       val_c = (uint8_t)(val_s << 6);
       val_c &= 0xD0;
     } else {
       /* Last 6 bits of output byte 3 */
       val_c |= val_s;
-      str[j++] = val_c;
+      output_buffer[j++] = val_c;
     }
   }
 
+  return j;
+}
+
+int
+oc_base64_decode(uint8_t *str, size_t len)
+{
+  int j = oc_base64_decode_v1(OC_BASE64_ENCODING_STD, true, str, len, str, len);
+  if (j < 0) {
+    return j;
+  }
+
   /* zero out the remaining bytes */
-  for (i = j; i < len; i++) {
+  for (size_t i = j; i < len; i++) {
     str[i] = 0;
   }