From cdc5b28dd07f090c980109dd7606682b05fcc445 Mon Sep 17 00:00:00 2001 From: Tomo Suzuki Date: Wed, 15 May 2024 15:18:12 -0400 Subject: [PATCH 1/7] fix: Base64 decoding to discard newline characters --- .../google/api/client/util/Base64Test.java | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java b/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java index 0ee174f9f..a94c88056 100644 --- a/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java +++ b/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java @@ -62,4 +62,45 @@ public void test_encodeBase64URLSafe_withNull_shouldReturnNull() { public void test_encodeBase64_withNull_shouldReturnNull() { assertNull(Base64.encodeBase64(null)); } + + public void test_decodeBase64_newline_character_invalid_length() { + // The RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-3.3) states that a + // specification referring to the Base64 encoding may state that it ignores characters outside + // the base alphabet. + + // In Base64 encoding, 3 characters (24 bits) are converted to 4 of 6-bits, each of which is + // converted to a byte (a character). + // Base64encode("abc") => "YWJj" (4 characters) + // Base64encode("def") => "ZGVm" (4 characters) + // Adding a new line character between them. This should be discarded. + String encodedString = "YWJj\nZGVm"; + + // This is a reference implementation by Apache Commons Codec. It discards the new line + // characters. + assertEquals( + "abcdef", + new String( + org.apache.commons.codec.binary.Base64.decodeBase64(encodedString), + StandardCharsets.UTF_8)); + // This is our implementation + assertEquals("abcdef", new String(Base64.decodeBase64(encodedString), StandardCharsets.UTF_8)); + } + + public void test_decodeBase64_newline_character_between() { + // In Base64 encoding, 2 characters (16 bits) are converted to 3 of 6-bits plus the padding + // character ('="). + // Base64encode("ab") => "YWI=" (3 characters + padding character) + // Adding a new line character that should be discarded between them + String encodedString = "YW\nI="; + + // This is a reference implementation by Apache Commons Codec. It discards the new line + // characters. + assertEquals( + "ab", + new String( + org.apache.commons.codec.binary.Base64.decodeBase64(encodedString), + StandardCharsets.UTF_8)); + // This is our implementation + assertEquals("ab", new String(Base64.decodeBase64(encodedString), StandardCharsets.UTF_8)); + } } From 0a4bd6497d7002f7b6fc5ff27c9f26b2bc81f569 Mon Sep 17 00:00:00 2001 From: Tomo Suzuki Date: Wed, 15 May 2024 15:28:19 -0400 Subject: [PATCH 2/7] implementation --- .../main/java/com/google/api/client/util/Base64.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/google-http-client/src/main/java/com/google/api/client/util/Base64.java b/google-http-client/src/main/java/com/google/api/client/util/Base64.java index 9225cd5dd..c4267d77a 100644 --- a/google-http-client/src/main/java/com/google/api/client/util/Base64.java +++ b/google-http-client/src/main/java/com/google/api/client/util/Base64.java @@ -26,6 +26,10 @@ */ @Deprecated public class Base64 { + // Special decoders that discards the new line character so that the behavior matches what + // we had with Apache Commmons Codec's decodeBase64. + private static final BaseEncoding BASE64 = BaseEncoding.base64().withSeparator("\n", 64); + private static final BaseEncoding BASE64URL = BaseEncoding.base64Url().withSeparator("\n", 64); /** * Encodes binary data using the base64 algorithm but does not chunk the output. @@ -92,6 +96,9 @@ public static byte[] decodeBase64(byte[] base64Data) { * Decodes a Base64 String into octets. Note that this method handles both URL-safe and * non-URL-safe base 64 encoded strings. * + *

For the compatibility with the old version that used Apache Commons Coded's decodeBase64, + * this method discards new line characters and trailing whitespaces. + * * @param base64String String containing Base64 data or {@code null} for {@code null} result * @return Array containing decoded data or {@code null} for {@code null} input */ @@ -100,10 +107,10 @@ public static byte[] decodeBase64(String base64String) { return null; } try { - return BaseEncoding.base64().decode(base64String); + return BASE64.decode(base64String); } catch (IllegalArgumentException e) { if (e.getCause() instanceof DecodingException) { - return BaseEncoding.base64Url().decode(base64String.trim()); + return BASE64URL.decode(base64String.trim()); } throw e; } From 67b40c8a1ac532a4a6e5a8b969dff87243662947 Mon Sep 17 00:00:00 2001 From: Tomo Suzuki Date: Wed, 15 May 2024 17:38:44 -0400 Subject: [PATCH 3/7] Added explanation of the 2nd argument of withSeparator --- .../java/com/google/api/client/util/Base64.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/google-http-client/src/main/java/com/google/api/client/util/Base64.java b/google-http-client/src/main/java/com/google/api/client/util/Base64.java index c4267d77a..44c38feb3 100644 --- a/google-http-client/src/main/java/com/google/api/client/util/Base64.java +++ b/google-http-client/src/main/java/com/google/api/client/util/Base64.java @@ -26,10 +26,13 @@ */ @Deprecated public class Base64 { - // Special decoders that discards the new line character so that the behavior matches what - // we had with Apache Commmons Codec's decodeBase64. - private static final BaseEncoding BASE64 = BaseEncoding.base64().withSeparator("\n", 64); - private static final BaseEncoding BASE64URL = BaseEncoding.base64Url().withSeparator("\n", 64); + // Guava's Base64 (https://datatracker.ietf.org/doc/html/rfc4648#section-4) decoders. When + // decoding, they discard the new line character so that the behavior matches what we had with + // Apache Commons Codec's decodeBase64. When encoding, they would insert a new line character + // every 64 (the 2nd argument of withSeparator method) characters. They are not used for encoding. + private static final BaseEncoding BASE64_DECODER = BaseEncoding.base64().withSeparator("\n", 64); + private static final BaseEncoding BASE64URL_DECODER = + BaseEncoding.base64Url().withSeparator("\n", 64); /** * Encodes binary data using the base64 algorithm but does not chunk the output. @@ -107,10 +110,10 @@ public static byte[] decodeBase64(String base64String) { return null; } try { - return BASE64.decode(base64String); + return BASE64_DECODER.decode(base64String); } catch (IllegalArgumentException e) { if (e.getCause() instanceof DecodingException) { - return BASE64URL.decode(base64String.trim()); + return BASE64URL_DECODER.decode(base64String.trim()); } throw e; } From 5cee049d101272170980d4fb4355cd5d6aa01a99 Mon Sep 17 00:00:00 2001 From: Tomo Suzuki Date: Wed, 15 May 2024 17:39:07 -0400 Subject: [PATCH 4/7] Commented out the reference implementation in test --- .../google/api/client/util/Base64Test.java | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java b/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java index a94c88056..da4312f07 100644 --- a/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java +++ b/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java @@ -77,11 +77,12 @@ public void test_decodeBase64_newline_character_invalid_length() { // This is a reference implementation by Apache Commons Codec. It discards the new line // characters. - assertEquals( - "abcdef", - new String( - org.apache.commons.codec.binary.Base64.decodeBase64(encodedString), - StandardCharsets.UTF_8)); + // assertEquals( + // "abcdef", + // new String( + // org.apache.commons.codec.binary.Base64.decodeBase64(encodedString), + // StandardCharsets.UTF_8)); + // This is our implementation assertEquals("abcdef", new String(Base64.decodeBase64(encodedString), StandardCharsets.UTF_8)); } @@ -95,11 +96,12 @@ public void test_decodeBase64_newline_character_between() { // This is a reference implementation by Apache Commons Codec. It discards the new line // characters. - assertEquals( - "ab", - new String( - org.apache.commons.codec.binary.Base64.decodeBase64(encodedString), - StandardCharsets.UTF_8)); + // assertEquals( + // "ab", + // new String( + // org.apache.commons.codec.binary.Base64.decodeBase64(encodedString), + // StandardCharsets.UTF_8)); + // This is our implementation assertEquals("ab", new String(Base64.decodeBase64(encodedString), StandardCharsets.UTF_8)); } From 4e2d7b7753e6bba673978629cb26638714abe4bd Mon Sep 17 00:00:00 2001 From: Tomo Suzuki Date: Wed, 15 May 2024 18:33:31 -0400 Subject: [PATCH 5/7] adding test case for "+" character and new line character --- .../google/api/client/util/Base64Test.java | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java b/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java index da4312f07..417a82c2f 100644 --- a/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java +++ b/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java @@ -14,6 +14,8 @@ package com.google.api.client.util; +import static com.google.common.truth.Truth.assertThat; + import java.nio.charset.StandardCharsets; import junit.framework.TestCase; @@ -83,11 +85,13 @@ public void test_decodeBase64_newline_character_invalid_length() { // org.apache.commons.codec.binary.Base64.decodeBase64(encodedString), // StandardCharsets.UTF_8)); - // This is our implementation + // This is our implementation. Before the + // https://github.com/googleapis/google-http-java-client/pull/1941/, it was throwing + // IllegalArgumentException("Invalid length 9"). assertEquals("abcdef", new String(Base64.decodeBase64(encodedString), StandardCharsets.UTF_8)); } - public void test_decodeBase64_newline_character_between() { + public void test_decodeBase64_newline_character() { // In Base64 encoding, 2 characters (16 bits) are converted to 3 of 6-bits plus the padding // character ('="). // Base64encode("ab") => "YWI=" (3 characters + padding character) @@ -102,7 +106,20 @@ public void test_decodeBase64_newline_character_between() { // org.apache.commons.codec.binary.Base64.decodeBase64(encodedString), // StandardCharsets.UTF_8)); - // This is our implementation + // This is our implementation. Before the + // https://github.com/googleapis/google-http-java-client/pull/1941/, it was throwing + // IllegalArgumentException("Unrecognized character: 0xa"). assertEquals("ab", new String(Base64.decodeBase64(encodedString), StandardCharsets.UTF_8)); } + + public void test_decodeBase64__plus_and_newline_characters() { + // The plus sign is 62 in the Base64 table. So it's a valid character in an encoded strings. + // https://datatracker.ietf.org/doc/html/rfc4648#section-4 + String encodedString = "+\nw=="; + + byte[] actual = Base64.decodeBase64(encodedString); + // Before the https://github.com/googleapis/google-http-java-client/pull/1941/, it was throwing + // IllegalArgumentException("Unrecognized character: +"). + assertThat(actual).isEqualTo(new byte[] {(byte) 0xfb}); + } } From 2c5772d791ff07ed4b7670287b5413c1e406fd02 Mon Sep 17 00:00:00 2001 From: Tomo Suzuki Date: Wed, 15 May 2024 23:42:25 -0400 Subject: [PATCH 6/7] clarified comment --- .../src/main/java/com/google/api/client/util/Base64.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google-http-client/src/main/java/com/google/api/client/util/Base64.java b/google-http-client/src/main/java/com/google/api/client/util/Base64.java index 44c38feb3..178bc4829 100644 --- a/google-http-client/src/main/java/com/google/api/client/util/Base64.java +++ b/google-http-client/src/main/java/com/google/api/client/util/Base64.java @@ -28,8 +28,8 @@ public class Base64 { // Guava's Base64 (https://datatracker.ietf.org/doc/html/rfc4648#section-4) decoders. When // decoding, they discard the new line character so that the behavior matches what we had with - // Apache Commons Codec's decodeBase64. When encoding, they would insert a new line character - // every 64 (the 2nd argument of withSeparator method) characters. They are not used for encoding. + // Apache Commons Codec's decodeBase64. + // The 2nd argument of the withSeparator method, "64", does not have any effect in decoding. private static final BaseEncoding BASE64_DECODER = BaseEncoding.base64().withSeparator("\n", 64); private static final BaseEncoding BASE64URL_DECODER = BaseEncoding.base64Url().withSeparator("\n", 64); From be28670e7a03105ca5213651fa8d78b78e957986 Mon Sep 17 00:00:00 2001 From: Tomo Suzuki Date: Thu, 16 May 2024 12:54:06 -0400 Subject: [PATCH 7/7] test case with the slash character --- .../google/api/client/util/Base64Test.java | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java b/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java index 417a82c2f..218dd1f1b 100644 --- a/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java +++ b/google-http-client/src/test/java/com/google/api/client/util/Base64Test.java @@ -106,20 +106,31 @@ public void test_decodeBase64_newline_character() { // org.apache.commons.codec.binary.Base64.decodeBase64(encodedString), // StandardCharsets.UTF_8)); - // This is our implementation. Before the + // This is our implementation. Before the fix // https://github.com/googleapis/google-http-java-client/pull/1941/, it was throwing // IllegalArgumentException("Unrecognized character: 0xa"). assertEquals("ab", new String(Base64.decodeBase64(encodedString), StandardCharsets.UTF_8)); } - public void test_decodeBase64__plus_and_newline_characters() { - // The plus sign is 62 in the Base64 table. So it's a valid character in an encoded strings. + public void test_decodeBase64_plus_and_newline_characters() { + // The plus sign is 62 in the Base64 table. So it's a valid character in encoded strings. // https://datatracker.ietf.org/doc/html/rfc4648#section-4 String encodedString = "+\nw=="; byte[] actual = Base64.decodeBase64(encodedString); - // Before the https://github.com/googleapis/google-http-java-client/pull/1941/, it was throwing - // IllegalArgumentException("Unrecognized character: +"). + // Before the fix https://github.com/googleapis/google-http-java-client/pull/1941/, it was + // throwing IllegalArgumentException("Unrecognized character: +"). assertThat(actual).isEqualTo(new byte[] {(byte) 0xfb}); } + + public void test_decodeBase64_slash_and_newline_characters() { + // The slash sign is 63 in the Base64 table. So it's a valid character in encoded strings. + // https://datatracker.ietf.org/doc/html/rfc4648#section-4 + String encodedString = "/\nw=="; + + byte[] actual = Base64.decodeBase64(encodedString); + // Before the fix https://github.com/googleapis/google-http-java-client/pull/1941/, it was + // throwing IllegalArgumentException("Unrecognized character: /"). + assertThat(actual).isEqualTo(new byte[] {(byte) 0xff}); + } }