Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: more forgiving base64 transformation [custom implementation] #944

Merged
merged 10 commits into from
Jan 3, 2024
79 changes: 70 additions & 9 deletions internal/transformations/base64decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,79 @@

package transformations

import (
"encoding/base64"
import "strings"

stringsutil "github.com/corazawaf/coraza/v3/internal/strings"
)
var base64DecMap = []byte{
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 62, 127, 127, 127, 63, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 127, 127,
127, 64, 127, 127, 127, 0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 127, 127, 127, 127, 127, 127, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, 127, 127, 127, 127, 127,
}

// base64decode decodes a Base64-encoded string.
// Padding is optional.
// Partial decoding is returned up to the first invalid character (if any).
// New line characters (\r and \n) are ignored.
func base64decode(data string) (string, bool, error) {
dec, err := base64.StdEncoding.DecodeString(data)
if err != nil {
// Forgiving implementation, which ignores invalid characters
return data, false, nil
res := doBase64decode(data)
return res, true, nil
}

func doBase64decode(src string) string {
slen := len(src)
if slen == 0 {
return src
}

var n, x int
var dst strings.Builder
dst.Grow(slen)

for i := 0; i < slen; i++ {
currChar := src[i]
// new line characters are ignored.
if currChar == '\r' || currChar == '\n' {
continue
}
// If invalid character or padding reached, we stop decoding
if currChar == '=' || currChar == ' ' || currChar > 127 {
break
}
decodedChar := base64DecMap[currChar]
// Another condition of invalid character
if decodedChar == 127 {
break
}

x = (x << 6) | int(decodedChar&0x3F)
n++
if n == 4 {
dst.WriteByte(byte(x >> 16))
dst.WriteByte(byte(x >> 8))
dst.WriteByte(byte(x))
n = 0
x = 0
}
}
return stringsutil.WrapUnsafe(dec), true, nil

// Handle any remaining characters
if n == 2 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it still make sense to executer these when we break above on illegal character?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Early returning dst.String() when we break above on illegal character (so not when padding reached) leads to some failing tests:

--- FAIL: TestBase64Decode (0.00s)
    --- FAIL: TestBase64Decode/decoded_up_to_the_space_(invalid_character) (0.00s)
        /Users/matteopace/Repo/coraza/internal/transformations/base64decode_test.go:83: Expected "<T", but got ""
    --- FAIL: TestBase64Decode/decoded_up_to_the_dot_(invalid_character)#02 (0.00s)
        /Users/matteopace/Repo/coraza/internal/transformations/base64decode_test.go:83: Expected "<script", but got "<scrip"
    --- FAIL: TestBase64Decode/decoded_up_to_the_dash_(invalid_character_for_base64,_only_valid_for_Base64url) (0.00s)
        /Users/matteopace/Repo/coraza/internal/transformations/base64decode_test.go:83: Expected "<TEST", but got "<TE"

Trailing characters require to be rearranged even if that case, as if the end of the string was reached

x <<= 12
dst.WriteByte(byte(x >> 16))
} else if n == 3 {
x <<= 6
dst.WriteByte(byte(x >> 16))
dst.WriteByte(byte(x >> 8))
}

return dst.String()
}
170 changes: 163 additions & 7 deletions internal/transformations/base64decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,173 @@ import (
"testing"
)

var b64DecodeTests = []string{
"VGVzdENhc2U=",
"P.HNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==",
"VGVzdABDYXNl",
var b64DecodeTests = []struct {
name string
input string
expected string
}{
{
name: "Valid",
input: "VGVzdENhc2U=",
expected: "TestCase",
},
{
name: "Valid with \u0000",
input: "VGVzdABDYXNl",
expected: "Test\x00Case",
},
{
name: "Valid without padding",
input: "VGVzdENhc2U",
expected: "TestCase",
},
{
name: "Valid without longer padding",
input: "PA==",
expected: "<",
},
{
name: "valid <TEST>",
input: "PFRFU1Q+",
expected: "<TEST>",
},
{
name: "Malformed base64 encoding",
input: "PHNjcmlwd",
expected: "<scrip",
},
{
name: "decoded up to the space (invalid character)",
input: "PFR FU1Q+",
expected: "<T",
},
{
name: "decoded up to the dot (invalid character)",
input: "P.HNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==",
expected: "", // Only the P character does not result in a printable character conversion.
},
{
name: "decoded up to the dot (invalid character)",
input: "PHNjcmlwd.D5hbGVydCgxKTwvc2NyaXB0Pg==",
expected: "<scrip",
},
{
name: "decoded up to the dot (invalid character)",
input: "PHNjcmlwdD.5hbGVydCgxKTwvc2NyaXB0Pg==",
expected: "<script",
},
{
name: "decoded up to the dash (invalid character for base64, only valid for Base64url)",
input: "PFRFU1Q-",
expected: "<TEST",
},
// The following tests are from the golang base64 decoder tests
// Source: https://github.com/golang/go/blob/master/src/encoding/base64/base64_test.go
{
name: "golang test - RFC 3548 examples",
input: "FPucA9l+",
expected: "\x14\xfb\x9c\x03\xd9\x7e",
},
{
name: "golang test - RFC 3548 examples",
input: "FPucA9k=",
expected: "\x14\xfb\x9c\x03\xd9",
},
{
name: "golang test - RFC 3548 examples",
input: "FPucAw==",
expected: "\x14\xfb\x9c\x03",
},
{
name: "golang test - RFC 4648 examples",
input: "",
expected: "",
},
{
name: "golang test - RFC 4648 examples",
input: "Zg==",
expected: "f",
},
{
name: "golang test - RFC 4648 examples",
input: "Zm8=",
expected: "fo",
},
{
name: "golang test - RFC 4648 examples",
input: "Zm9v",
expected: "foo",
},
{
name: "golang test - RFC 4648 examples",
input: "Zm9vYg==",
expected: "foob",
},
{
name: "golang test - RFC 4648 examples",
input: "Zm9vYmE=",
expected: "fooba",
},
{
name: "golang test - RFC 4648 examples",
input: "Zm9vYmFy",
expected: "foobar",
},
{
name: "golang test - Wikipedia examples",
input: "c3VyZS4=",
expected: "sure.",
},
{
name: "golang test - Wikipedia examples",
input: "c3VyZQ==",
expected: "sure",
},
{
name: "golang test - Wikipedia examples",
input: "c3Vy",
expected: "sur",
},
{
name: "golang test - Wikipedia examples",
input: "c3U=",
expected: "su",
},
{
name: "golang test - Wikipedia examples",
input: "bGVhc3VyZS4=",
expected: "leasure.",
},
{
name: "golang test - Wikipedia examples",
input: "ZWFzdXJlLg==",
expected: "easure.",
},
{
name: "golang test - Wikipedia examples",
input: "YXN1cmUu",
expected: "asure.",
},
}

func TestBase64Decode(t *testing.T) {
for _, tt := range b64DecodeTests {
t.Run(tt.name, func(t *testing.T) {
actual, _, err := base64decode(tt.input)
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
if actual != tt.expected {
t.Errorf("Expected %q, but got %q", tt.expected, actual)
}
})
}
}
func BenchmarkB64Decode(b *testing.B) {
for _, tt := range b64DecodeTests {
b.Run(tt, func(b *testing.B) {
b.Run(tt.input, func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _, err := base64decode(tt)
_, _, err := base64decode(tt.input)
if err != nil {
b.Error(err)
}
Expand All @@ -31,7 +187,7 @@ func BenchmarkB64Decode(b *testing.B) {

func FuzzB64Decode(f *testing.F) {
for _, tc := range b64DecodeTests {
f.Add(tc)
f.Add(tc.input)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interestingly the fuzz test was still set up for this forgiving version, that's nice

}
f.Fuzz(func(t *testing.T, tc string) {
data, _, err := base64decode(tc)
Expand Down
Loading