corazawaf · M4tteoP · Jan 3, 2024 · Dec 13, 2023 · Dec 15, 2023 · Dec 18, 2023
@@ -3,18 +3,82 @@
 
 package transformations
 
-import (
-	"encoding/base64"
+import "strings"
 
-	stringsutil "github.com/corazawaf/coraza/v3/internal/strings"
-)
+var base64DecMap = []byte{
+	127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
+	127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
+	127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
+	127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
+	127, 127, 127, 62, 127, 127, 127, 63, 52, 53,
+	54, 55, 56, 57, 58, 59, 60, 61, 127, 127,
+	127, 64, 127, 127, 127, 0, 1, 2, 3, 4,
+	5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+	15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+	25, 127, 127, 127, 127, 127, 127, 26, 27, 28,
+	29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
+	39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+	49, 50, 51, 127, 127, 127, 127, 127,
+}
 
 // base64decode decodes a Base64-encoded string.
+// Padding is optional.
+// Partial decoding is returned up to the first invalid character (if any).
+// New line characters (\r and \n) are ignored.
+// Note: a custom base64 decoder is used in order to return partial decoding when an error arises. It
+// would be possible to use the standard library only relying on undocumented behaviors of the decoder.
+// For more context, see https://github.com/corazawaf/coraza/pull/940
 func base64decode(data string) (string, bool, error) {
-	dec, err := base64.StdEncoding.DecodeString(data)
-	if err != nil {
-		// Forgiving implementation, which ignores invalid characters
-		return data, false, nil
+	res := doBase64decode(data)
+	return res, true, nil
+}
+
+func doBase64decode(src string) string {
+	slen := len(src)
+	if slen == 0 {
+		return src
+	}
+
+	var n, x int
+	var dst strings.Builder
+	dst.Grow(slen)
+
+	for i := 0; i < slen; i++ {
+		currChar := src[i]
+		// new line characters are ignored.
+		if currChar == '\r' || currChar == '\n' {
+			continue
+		}
+		// If invalid character or padding reached, we stop decoding
+		if currChar == '=' || currChar == ' ' || currChar > 127 {
+			break
+		}
+		decodedChar := base64DecMap[currChar]
+		// Another condition of invalid character
+		if decodedChar == 127 {
+			break
+		}
+
+		x = (x << 6) | int(decodedChar&0x3F)
+		n++
+		if n == 4 {
+			dst.WriteByte(byte(x >> 16))
+			dst.WriteByte(byte(x >> 8))
+			dst.WriteByte(byte(x))
+			n = 0
+			x = 0
+		}
 	}
-	return stringsutil.WrapUnsafe(dec), true, nil
+
+	// Handle any remaining characters
+	if n == 2 {
+		x <<= 12
+		dst.WriteByte(byte(x >> 16))
+	} else if n == 3 {
+		x <<= 6
+		dst.WriteByte(byte(x >> 16))
+		dst.WriteByte(byte(x >> 8))
+	}
+
+	return dst.String()
 }
@@ -10,17 +10,173 @@ import (
 	"testing"
 )
 
-var b64DecodeTests = []string{
-	"VGVzdENhc2U=",
-	"P.HNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==",
-	"VGVzdABDYXNl",
+var b64DecodeTests = []struct {
+	name     string
+	input    string
+	expected string
+}{
+	{
+		name:     "Valid",
+		input:    "VGVzdENhc2U=",
+		expected: "TestCase",
+	},
+	{
+		name:     "Valid with \u0000",
+		input:    "VGVzdABDYXNl",
+		expected: "Test\x00Case",
+	},
+	{
+		name:     "Valid without padding",
+		input:    "VGVzdENhc2U",
+		expected: "TestCase",
+	},
+	{
+		name:     "Valid without longer padding",
+		input:    "PA==",
+		expected: "<",
+	},
+	{
+		name:     "valid <TEST>",
+		input:    "PFRFU1Q+",
+		expected: "<TEST>",
+	},
+	{
+		name:     "Malformed base64 encoding",
+		input:    "PHNjcmlwd",
+		expected: "<scrip",
+	},
+	{
+		name:     "decoded up to the space (invalid character)",
+		input:    "PFR FU1Q+",
+		expected: "<T",
+	},
+	{
+		name:     "decoded up to the dot (invalid character)",
+		input:    "P.HNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==",
+		expected: "", // Only the P character does not result in a printable character conversion.
+	},
+	{
+		name:     "decoded up to the dot (invalid character)",
+		input:    "PHNjcmlwd.D5hbGVydCgxKTwvc2NyaXB0Pg==",
+		expected: "<scrip",
+	},
+	{
+		name:     "decoded up to the dot (invalid character)",
+		input:    "PHNjcmlwdD.5hbGVydCgxKTwvc2NyaXB0Pg==",
+		expected: "<script",
+	},
+	{
+		name:     "decoded up to the dash (invalid character for base64, only valid for Base64url)",
+		input:    "PFRFU1Q-",
+		expected: "<TEST",
+	},
+	// The following tests are from the golang base64 decoder tests
+	// Source: https://github.com/golang/go/blob/c95fe91d0715dc0a8d55ac80a80f383c3635548b/src/encoding/base64/base64_test.go#L25C4-L49
+	{
+		name:     "golang test - RFC 3548 examples",
+		input:    "FPucA9l+",
+		expected: "\x14\xfb\x9c\x03\xd9\x7e",
+	},
+	{
+		name:     "golang test - RFC 3548 examples",
+		input:    "FPucA9k=",
+		expected: "\x14\xfb\x9c\x03\xd9",
+	},
+	{
+		name:     "golang test - RFC 3548 examples",
+		input:    "FPucAw==",
+		expected: "\x14\xfb\x9c\x03",
+	},
+	{
+		name:     "golang test - RFC 4648 examples",
+		input:    "",
+		expected: "",
+	},
+	{
+		name:     "golang test - RFC 4648 examples",
+		input:    "Zg==",
+		expected: "f",
+	},
+	{
+		name:     "golang test - RFC 4648 examples",
+		input:    "Zm8=",
+		expected: "fo",
+	},
+	{
+		name:     "golang test - RFC 4648 examples",
+		input:    "Zm9v",
+		expected: "foo",
+	},
+	{
+		name:     "golang test - RFC 4648 examples",
+		input:    "Zm9vYg==",
+		expected: "foob",
+	},
+	{
+		name:     "golang test - RFC 4648 examples",
+		input:    "Zm9vYmE=",
+		expected: "fooba",
+	},
+	{
+		name:     "golang test - RFC 4648 examples",
+		input:    "Zm9vYmFy",
+		expected: "foobar",
+	},
+	{
+		name:     "golang test - Wikipedia examples",
+		input:    "c3VyZS4=",
+		expected: "sure.",
+	},
+	{
+		name:     "golang test - Wikipedia examples",
+		input:    "c3VyZQ==",
+		expected: "sure",
+	},
+	{
+		name:     "golang test - Wikipedia examples",
+		input:    "c3Vy",
+		expected: "sur",
+	},
+	{
+		name:     "golang test - Wikipedia examples",
+		input:    "c3U=",
+		expected: "su",
+	},
+	{
+		name:     "golang test - Wikipedia examples",
+		input:    "bGVhc3VyZS4=",
+		expected: "leasure.",
+	},
+	{
+		name:     "golang test - Wikipedia examples",
+		input:    "ZWFzdXJlLg==",
+		expected: "easure.",
+	},
+	{
+		name:     "golang test - Wikipedia examples",
+		input:    "YXN1cmUu",
+		expected: "asure.",
+	},
 }
 
+func TestBase64Decode(t *testing.T) {
+	for _, tt := range b64DecodeTests {
+		t.Run(tt.name, func(t *testing.T) {
+			actual, _, err := base64decode(tt.input)
+			if err != nil {
+				t.Errorf("Unexpected error: %v", err)
+			}
+			if actual != tt.expected {
+				t.Errorf("Expected %q, but got %q", tt.expected, actual)
+			}
+		})
+	}
+}
 func BenchmarkB64Decode(b *testing.B) {
 	for _, tt := range b64DecodeTests {
-		b.Run(tt, func(b *testing.B) {
+		b.Run(tt.input, func(b *testing.B) {
 			for i := 0; i < b.N; i++ {
-				_, _, err := base64decode(tt)
+				_, _, err := base64decode(tt.input)
 				if err != nil {
 					b.Error(err)
 				}
@@ -31,7 +187,7 @@ func BenchmarkB64Decode(b *testing.B) {
 
 func FuzzB64Decode(f *testing.F) {
 	for _, tc := range b64DecodeTests {
-		f.Add(tc)
+		f.Add(tc.input)
 	}
 	f.Fuzz(func(t *testing.T, tc string) {
 		data, _, err := base64decode(tc)