encoding/base32: Decoder with NoPadding doesn't fill the passed buffer #65166

thehowl · 2024-01-19T15:25:56Z

Go version

go 1.21.6

Output of `go env` in your module/workspace:

go playground

What did you do?

Here's a proof of concept: playground

package main

import (
	"encoding/base32"
	"fmt"
	"strings"
)

var enc = base32.StdEncoding.WithPadding(base32.NoPadding)

func main() {
	const fullString = "foobarbazqux"
	for i := 1; i <= len(fullString); i++ {
		runTest(fullString[:i], 0)
	}
}

func runTest(str string, extra int) {
	// Encode to base32
	encoded := enc.EncodeToString([]byte(str))

	// Decode back
	dec := base32.NewDecoder(enc, strings.NewReader(encoded))
	decLen := enc.DecodedLen(len(encoded))
	buf := make([]byte, decLen+extra)
	n, err := dec.Read(buf)
	if err != nil {
		panic(err)
	}

	// output result of test
	ch := "OK  "
	if str != string(buf[:n]) {
		ch = "FAIL"
	}
	fmt.Printf("%s runTest(%-13s %d): %-25q -> decLen: %02d -> %02d %-10q\n", ch, str+",", extra, encoded, decLen, n, string(buf[:n]))
}

Here's a second example, which shows how the success of the tests change based on whether we add extra length to the destination buffer: https://go.dev/play/p/A28R-myUHjT

What did you see happen?

First playground link output

OK   runTest(f,            0): "MY"                      -> decLen: 01 -> 01 "f"       
OK   runTest(fo,           0): "MZXQ"                    -> decLen: 02 -> 02 "fo"      
OK   runTest(foo,          0): "MZXW6"                   -> decLen: 03 -> 03 "foo"     
OK   runTest(foob,         0): "MZXW6YQ"                 -> decLen: 04 -> 04 "foob"    
OK   runTest(fooba,        0): "MZXW6YTB"                -> decLen: 05 -> 05 "fooba"   
FAIL runTest(foobar,       0): "MZXW6YTBOI"              -> decLen: 06 -> 05 "fooba"   
FAIL runTest(foobarb,      0): "MZXW6YTBOJRA"            -> decLen: 07 -> 05 "fooba"   
FAIL runTest(foobarba,     0): "MZXW6YTBOJRGC"           -> decLen: 08 -> 05 "fooba"   
FAIL runTest(foobarbaz,    0): "MZXW6YTBOJRGC6Q"         -> decLen: 09 -> 05 "fooba"   
OK   runTest(foobarbazq,   0): "MZXW6YTBOJRGC6TR"        -> decLen: 10 -> 10 "foobarbazq"
FAIL runTest(foobarbazqu,  0): "MZXW6YTBOJRGC6TROU"      -> decLen: 11 -> 10 "foobarbazq"
FAIL runTest(foobarbazqux, 0): "MZXW6YTBOJRGC6TROV4A"    -> decLen: 12 -> 10 "foobarbazq"

Second playground link output

OK   runTest(f,            0): "MY"                      -> decLen: 01 -> 01 "f"       
OK   runTest(fo,           0): "MZXQ"                    -> decLen: 02 -> 02 "fo"      
OK   runTest(foo,          0): "MZXW6"                   -> decLen: 03 -> 03 "foo"     
OK   runTest(foob,         0): "MZXW6YQ"                 -> decLen: 04 -> 04 "foob"    
OK   runTest(fooba,        0): "MZXW6YTB"                -> decLen: 05 -> 05 "fooba"   
FAIL runTest(foobar,       0): "MZXW6YTBOI"              -> decLen: 06 -> 05 "fooba"   
FAIL runTest(foobarb,      0): "MZXW6YTBOJRA"            -> decLen: 07 -> 05 "fooba"   
FAIL runTest(foobarba,     0): "MZXW6YTBOJRGC"           -> decLen: 08 -> 05 "fooba"   
FAIL runTest(foobarbaz,    0): "MZXW6YTBOJRGC6Q"         -> decLen: 09 -> 05 "fooba"   
OK   runTest(foobarbazq,   0): "MZXW6YTBOJRGC6TR"        -> decLen: 10 -> 10 "foobarbazq"
FAIL runTest(foobarbazqu,  0): "MZXW6YTBOJRGC6TROU"      -> decLen: 11 -> 10 "foobarbazq"
FAIL runTest(foobarbazqux, 0): "MZXW6YTBOJRGC6TROV4A"    -> decLen: 12 -> 10 "foobarbazq"

OK   runTest(f,            1): "MY"                      -> decLen: 01 -> 01 "f"       
OK   runTest(fo,           1): "MZXQ"                    -> decLen: 02 -> 02 "fo"      
OK   runTest(foo,          1): "MZXW6"                   -> decLen: 03 -> 03 "foo"     
OK   runTest(foob,         1): "MZXW6YQ"                 -> decLen: 04 -> 04 "foob"    
OK   runTest(fooba,        1): "MZXW6YTB"                -> decLen: 05 -> 05 "fooba"   
FAIL runTest(foobar,       1): "MZXW6YTBOI"              -> decLen: 06 -> 05 "fooba"   
FAIL runTest(foobarb,      1): "MZXW6YTBOJRA"            -> decLen: 07 -> 05 "fooba"   
FAIL runTest(foobarba,     1): "MZXW6YTBOJRGC"           -> decLen: 08 -> 05 "fooba"   
OK   runTest(foobarbaz,    1): "MZXW6YTBOJRGC6Q"         -> decLen: 09 -> 09 "foobarbaz"
OK   runTest(foobarbazq,   1): "MZXW6YTBOJRGC6TR"        -> decLen: 10 -> 10 "foobarbazq"
FAIL runTest(foobarbazqu,  1): "MZXW6YTBOJRGC6TROU"      -> decLen: 11 -> 10 "foobarbazq"
FAIL runTest(foobarbazqux, 1): "MZXW6YTBOJRGC6TROV4A"    -> decLen: 12 -> 10 "foobarbazq"

OK   runTest(f,            2): "MY"                      -> decLen: 01 -> 01 "f"       
OK   runTest(fo,           2): "MZXQ"                    -> decLen: 02 -> 02 "fo"      
OK   runTest(foo,          2): "MZXW6"                   -> decLen: 03 -> 03 "foo"     
OK   runTest(foob,         2): "MZXW6YQ"                 -> decLen: 04 -> 04 "foob"    
OK   runTest(fooba,        2): "MZXW6YTB"                -> decLen: 05 -> 05 "fooba"   
FAIL runTest(foobar,       2): "MZXW6YTBOI"              -> decLen: 06 -> 05 "fooba"   
FAIL runTest(foobarb,      2): "MZXW6YTBOJRA"            -> decLen: 07 -> 05 "fooba"   
OK   runTest(foobarba,     2): "MZXW6YTBOJRGC"           -> decLen: 08 -> 08 "foobarba"
OK   runTest(foobarbaz,    2): "MZXW6YTBOJRGC6Q"         -> decLen: 09 -> 09 "foobarbaz"
OK   runTest(foobarbazq,   2): "MZXW6YTBOJRGC6TR"        -> decLen: 10 -> 10 "foobarbazq"
FAIL runTest(foobarbazqu,  2): "MZXW6YTBOJRGC6TROU"      -> decLen: 11 -> 10 "foobarbazq"
FAIL runTest(foobarbazqux, 2): "MZXW6YTBOJRGC6TROV4A"    -> decLen: 12 -> 10 "foobarbazq"

OK   runTest(f,            3): "MY"                      -> decLen: 01 -> 01 "f"       
OK   runTest(fo,           3): "MZXQ"                    -> decLen: 02 -> 02 "fo"      
OK   runTest(foo,          3): "MZXW6"                   -> decLen: 03 -> 03 "foo"     
OK   runTest(foob,         3): "MZXW6YQ"                 -> decLen: 04 -> 04 "foob"    
OK   runTest(fooba,        3): "MZXW6YTB"                -> decLen: 05 -> 05 "fooba"   
FAIL runTest(foobar,       3): "MZXW6YTBOI"              -> decLen: 06 -> 05 "fooba"   
OK   runTest(foobarb,      3): "MZXW6YTBOJRA"            -> decLen: 07 -> 07 "foobarb" 
OK   runTest(foobarba,     3): "MZXW6YTBOJRGC"           -> decLen: 08 -> 08 "foobarba"
OK   runTest(foobarbaz,    3): "MZXW6YTBOJRGC6Q"         -> decLen: 09 -> 09 "foobarbaz"
OK   runTest(foobarbazq,   3): "MZXW6YTBOJRGC6TR"        -> decLen: 10 -> 10 "foobarbazq"
FAIL runTest(foobarbazqu,  3): "MZXW6YTBOJRGC6TROU"      -> decLen: 11 -> 10 "foobarbazq"
OK   runTest(foobarbazqux, 3): "MZXW6YTBOJRGC6TROV4A"    -> decLen: 12 -> 12 "foobarbazqux"

OK   runTest(f,            4): "MY"                      -> decLen: 01 -> 01 "f"       
OK   runTest(fo,           4): "MZXQ"                    -> decLen: 02 -> 02 "fo"      
OK   runTest(foo,          4): "MZXW6"                   -> decLen: 03 -> 03 "foo"     
OK   runTest(foob,         4): "MZXW6YQ"                 -> decLen: 04 -> 04 "foob"    
OK   runTest(fooba,        4): "MZXW6YTB"                -> decLen: 05 -> 05 "fooba"   
OK   runTest(foobar,       4): "MZXW6YTBOI"              -> decLen: 06 -> 06 "foobar"  
OK   runTest(foobarb,      4): "MZXW6YTBOJRA"            -> decLen: 07 -> 07 "foobarb" 
OK   runTest(foobarba,     4): "MZXW6YTBOJRGC"           -> decLen: 08 -> 08 "foobarba"
OK   runTest(foobarbaz,    4): "MZXW6YTBOJRGC6Q"         -> decLen: 09 -> 09 "foobarbaz"
OK   runTest(foobarbazq,   4): "MZXW6YTBOJRGC6TR"        -> decLen: 10 -> 10 "foobarbazq"
OK   runTest(foobarbazqu,  4): "MZXW6YTBOJRGC6TROU"      -> decLen: 11 -> 11 "foobarbazqu"
OK   runTest(foobarbazqux, 4): "MZXW6YTBOJRGC6TROV4A"    -> decLen: 12 -> 12 "foobarbazqux"

OK   runTest(f,            5): "MY"                      -> decLen: 01 -> 01 "f"       
OK   runTest(fo,           5): "MZXQ"                    -> decLen: 02 -> 02 "fo"      
OK   runTest(foo,          5): "MZXW6"                   -> decLen: 03 -> 03 "foo"     
OK   runTest(foob,         5): "MZXW6YQ"                 -> decLen: 04 -> 04 "foob"    
OK   runTest(fooba,        5): "MZXW6YTB"                -> decLen: 05 -> 05 "fooba"   
OK   runTest(foobar,       5): "MZXW6YTBOI"              -> decLen: 06 -> 06 "foobar"  
OK   runTest(foobarb,      5): "MZXW6YTBOJRA"            -> decLen: 07 -> 07 "foobarb" 
OK   runTest(foobarba,     5): "MZXW6YTBOJRGC"           -> decLen: 08 -> 08 "foobarba"
OK   runTest(foobarbaz,    5): "MZXW6YTBOJRGC6Q"         -> decLen: 09 -> 09 "foobarbaz"
OK   runTest(foobarbazq,   5): "MZXW6YTBOJRGC6TR"        -> decLen: 10 -> 10 "foobarbazq"
OK   runTest(foobarbazqu,  5): "MZXW6YTBOJRGC6TROU"      -> decLen: 11 -> 11 "foobarbazqu"
OK   runTest(foobarbazqux, 5): "MZXW6YTBOJRGC6TROV4A"    -> decLen: 12 -> 12 "foobarbazqux"

This specifically happens when using the decoder with a NoPadding encoding. It appears that package base32 underestimates the amount of data it can write to the buffer when using NoPadding.

When using Decoder and trying to read a chunk of data using a buffer, appropriately sized to what is described as "the maximum length in bytes of the decoded data", the decoder doesn't attempt to fill the entire buffer.

This seems to come from the following piece of code:

	// Read a chunk.
	nn := len(p) / 5 * 8
	if nn < 8 {
		nn = 8
	}
	if nn > len(d.buf) {
		nn = len(d.buf)
	}

nn is the number of bytes that is attempted to be read later, upper bounded by the decoder's internal buffer (d.buf). It seems that the amount of bytes that should be read from the encoded input is calculated based on a copy of the EncodedLen code, which fails to take into account the difference for NoPadding.

What did you expect to see?

I expected that using Decoder to decode data into a buffer not to be dependent on the size of the input buffer when it is already of the size provided by DecodedLen, unless of course this was upper bounded by the decoder's internal buffer.

This has not created a production issue for me, but it seems to be based on a reasonable expectation (albeit a specific use case where Decode with the buffer + DecodedLen would probably be a better option).

The text was updated successfully, but these errors were encountered:

thehowl · 2024-01-19T15:35:46Z

Something similar seems to also be happening also on package encoding/base64, albeit the results are a bit different (and unaffected by adding extra bytes to the input buffer): https://go.dev/play/p/Sqadb_z_aUi

This one at least does not vary depending on the decoded size. I'll wait for this issue to be triaged before creating a new issue and investigate for base64.

dsnet · 2024-01-21T01:46:05Z

This has been failing as far back as Go 1.11, so not a regression (of relevance for upcoming Go 1.22 release).

cherrymui · 2024-01-23T17:21:41Z

cc @rsc

If unpadded content was passed, in some occassions content was omitted, because the division result was floored. Ceiling it makes sure all content is always read. Fixes golang#65166 Signed-off-by: Niklas Ott <[email protected]>

gopherbot · 2024-04-24T14:18:09Z

Change https://go.dev/cl/581415 mentions this issue: encoding/base32: fix buffer without padding

gopherbot · 2024-04-24T14:18:10Z

Change https://go.dev/cl/581416 mentions this issue: encoding/base64: fix buffer without padding

thehowl changed the title ~~encoding/base32: Decoder with NoPadding doesn't fill the passed buffers~~ encoding/base32: Decoder with NoPadding doesn't fill the passed buffer Jan 19, 2024

This comment was marked as outdated.

Sign in to view

cherrymui added the NeedsInvestigation Someone must examine and confirm this is a valid issue and not a duplicate of an existing one. label Jan 23, 2024

cherrymui added this to the Backlog milestone Jan 23, 2024

zivkovicmilos mentioned this issue Jan 29, 2024

Minutes: Core Staff Weekly Syncs [every Monday] gnolang/meetings#36

Open

dmitshur added NeedsFix The path to resolution is known, but the work has not been done. and removed NeedsInvestigation Someone must examine and confirm this is a valid issue and not a duplicate of an existing one. labels May 13, 2024

dmitshur modified the milestones: Backlog, Go1.23 May 13, 2024

gopherbot closed this as completed in c7c578c May 14, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

encoding/base32: Decoder with NoPadding doesn't fill the passed buffer #65166

encoding/base32: Decoder with NoPadding doesn't fill the passed buffer #65166

thehowl commented Jan 19, 2024 •

edited

Loading

thehowl commented Jan 19, 2024

This comment was marked as outdated.

dsnet commented Jan 21, 2024 •

edited

Loading

cherrymui commented Jan 23, 2024

gopherbot commented Apr 24, 2024

gopherbot commented Apr 24, 2024

encoding/base32: Decoder with NoPadding doesn't fill the passed buffer #65166

encoding/base32: Decoder with NoPadding doesn't fill the passed buffer #65166

Comments

thehowl commented Jan 19, 2024 • edited Loading

Go version

Output of go env in your module/workspace:

What did you do?

What did you see happen?

What did you expect to see?

thehowl commented Jan 19, 2024

This comment was marked as outdated.

dsnet commented Jan 21, 2024 • edited Loading

cherrymui commented Jan 23, 2024

gopherbot commented Apr 24, 2024

gopherbot commented Apr 24, 2024

thehowl commented Jan 19, 2024 •

edited

Loading

Output of `go env` in your module/workspace:

dsnet commented Jan 21, 2024 •

edited

Loading