Skip to content

Commit

Permalink
archive/tar: detect truncated files
Browse files Browse the repository at this point in the history
Motivation:
* Reader.skipUnread never reports io.ErrUnexpectedEOF. This is strange
given that io.ErrUnexpectedEOF is given through Reader.Read if the
user manually reads the file.
* Reader.skipUnread fails to detect truncated files since io.Seeker
is lazy about reporting errors. Thus, the behavior of Reader differs
whether the input io.Reader also satisfies io.Seeker or not.

To solve this, we seek to one before the end of the data section and
always rely on at least one call to io.CopyN. If the tr.r satisfies
io.Seeker, this is guarunteed to never read more than blockSize.

Fixes #12557

Change-Id: I0ddddfc6bed0d74465cb7e7a02b26f1de7a7a279
Reviewed-on: https://go-review.googlesource.com/15175
Reviewed-by: Brad Fitzpatrick <[email protected]>
Run-TryBot: Brad Fitzpatrick <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
  • Loading branch information
dsnet authored and ianlancetaylor committed Nov 6, 2015
1 parent 6083bd6 commit e3b615f
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 38 deletions.
54 changes: 45 additions & 9 deletions src/archive/tar/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,16 +446,45 @@ func (tr *Reader) octal(b []byte) int64 {
return int64(x)
}

// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
func (tr *Reader) skipUnread() {
nr := tr.numBytes() + tr.pad // number of bytes to skip
// skipUnread skips any unread bytes in the existing file entry, as well as any
// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
// encountered in the data portion; it is okay to hit io.EOF in the padding.
//
// Note that this function still works properly even when sparse files are being
// used since numBytes returns the bytes remaining in the underlying io.Reader.
func (tr *Reader) skipUnread() error {
dataSkip := tr.numBytes() // Number of data bytes to skip
totalSkip := dataSkip + tr.pad // Total number of bytes to skip
tr.curr, tr.pad = nil, 0
if sr, ok := tr.r.(io.Seeker); ok {
if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
return

// If possible, Seek to the last byte before the end of the data section.
// Do this because Seek is often lazy about reporting errors; this will mask
// the fact that the tar stream may be truncated. We can rely on the
// io.CopyN done shortly afterwards to trigger any IO errors.
var seekSkipped int64 // Number of bytes skipped via Seek
if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
// io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported.
pos1, err := sr.Seek(0, os.SEEK_CUR)
if err == nil {
// Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
if err != nil {
tr.err = err
return tr.err
}
seekSkipped = pos2 - pos1
}
}
_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)

var copySkipped int64 // Number of bytes skipped via CopyN
copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
tr.err = io.ErrUnexpectedEOF
}
return tr.err
}

func (tr *Reader) verifyChecksum(header []byte) bool {
Expand All @@ -468,18 +497,25 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
return given == unsigned || given == signed
}

// readHeader reads the next block header and assumes that the underlying reader
// is already aligned to a block boundary.
//
// The err will be set to io.EOF only when one of the following occurs:
// * Exactly 0 bytes are read and EOF is hit.
// * Exactly 1 block of zeros is read and EOF is hit.
// * At least 2 blocks of zeros are read.
func (tr *Reader) readHeader() *Header {
header := tr.hdrBuff[:]
copy(header, zeroBlock)

if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
return nil
return nil // io.EOF is okay here
}

// Two blocks of zero bytes marks the end of the archive.
if bytes.Equal(header, zeroBlock[0:blockSize]) {
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
return nil
return nil // io.EOF is okay here
}
if bytes.Equal(header, zeroBlock[0:blockSize]) {
tr.err = io.EOF
Expand Down
156 changes: 127 additions & 29 deletions src/archive/tar/reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,35 +422,6 @@ func TestPartialRead(t *testing.T) {
}
}

func TestNonSeekable(t *testing.T) {
test := gnuTarTest
f, err := os.Open(test.file)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
defer f.Close()

type readerOnly struct {
io.Reader
}
tr := NewReader(readerOnly{f})
nread := 0

for ; ; nread++ {
_, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
}

if nread != len(test.headers) {
t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread)
}
}

func TestParsePAXHeader(t *testing.T) {
paxTests := [][3]string{
{"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths
Expand Down Expand Up @@ -803,3 +774,130 @@ func TestUninitializedRead(t *testing.T) {
}

}

type reader struct{ io.Reader }
type readSeeker struct{ io.ReadSeeker }
type readBadSeeker struct{ io.ReadSeeker }

func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") }

// TestReadTruncation test the ending condition on various truncated files and
// that truncated files are still detected even if the underlying io.Reader
// satisfies io.Seeker.
func TestReadTruncation(t *testing.T) {
var ss []string
for _, p := range []string{
"testdata/gnu.tar",
"testdata/ustar-file-reg.tar",
"testdata/pax-path-hdr.tar",
"testdata/sparse-formats.tar",
} {
buf, err := ioutil.ReadFile(p)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
ss = append(ss, string(buf))
}

data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3]
data2 += strings.Repeat("\x00", 10*512)
trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes

var vectors = []struct {
input string // Input stream
cnt int // Expected number of headers read
err error // Expected error outcome
}{
{"", 0, io.EOF}, // Empty file is a "valid" tar file
{data1[:511], 0, io.ErrUnexpectedEOF},
{data1[:512], 1, io.ErrUnexpectedEOF},
{data1[:1024], 1, io.EOF},
{data1[:1536], 2, io.ErrUnexpectedEOF},
{data1[:2048], 2, io.EOF},
{data1, 2, io.EOF},
{data1[:2048] + data2[:1536], 3, io.EOF},
{data2[:511], 0, io.ErrUnexpectedEOF},
{data2[:512], 1, io.ErrUnexpectedEOF},
{data2[:1195], 1, io.ErrUnexpectedEOF},
{data2[:1196], 1, io.EOF}, // Exact end of data and start of padding
{data2[:1200], 1, io.EOF},
{data2[:1535], 1, io.EOF},
{data2[:1536], 1, io.EOF}, // Exact end of padding
{data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF},
{data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF},
{data2[:1536] + trash, 1, ErrHeader},
{data2[:2048], 1, io.EOF}, // Exactly 1 empty block
{data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF},
{data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF},
{data2[:2048] + trash, 1, ErrHeader},
{data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream)
{data2[:2560] + trash[:1], 1, io.EOF},
{data2[:2560] + trash[:511], 1, io.EOF},
{data2[:2560] + trash, 1, io.EOF},
{data2[:3072], 1, io.EOF},
{pax, 0, io.EOF}, // PAX header without data is a "valid" tar file
{pax + trash[:1], 0, io.ErrUnexpectedEOF},
{pax + trash[:511], 0, io.ErrUnexpectedEOF},
{sparse[:511], 0, io.ErrUnexpectedEOF},
// TODO(dsnet): This should pass, but currently fails.
// {sparse[:512], 0, io.ErrUnexpectedEOF},
{sparse[:3584], 1, io.EOF},
{sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header
{sparse[:9216], 1, io.EOF},
{sparse[:9728], 2, io.ErrUnexpectedEOF},
{sparse[:10240], 2, io.EOF},
{sparse[:11264], 2, io.ErrUnexpectedEOF},
{sparse, 5, io.EOF},
{sparse + trash, 5, io.EOF},
}

for i, v := range vectors {
for j := 0; j < 6; j++ {
var tr *Reader
var s1, s2 string

switch j {
case 0:
tr = NewReader(&reader{strings.NewReader(v.input)})
s1, s2 = "io.Reader", "auto"
case 1:
tr = NewReader(&reader{strings.NewReader(v.input)})
s1, s2 = "io.Reader", "manual"
case 2:
tr = NewReader(&readSeeker{strings.NewReader(v.input)})
s1, s2 = "io.ReadSeeker", "auto"
case 3:
tr = NewReader(&readSeeker{strings.NewReader(v.input)})
s1, s2 = "io.ReadSeeker", "manual"
case 4:
tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
s1, s2 = "ReadBadSeeker", "auto"
case 5:
tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
s1, s2 = "ReadBadSeeker", "manual"
}

var cnt int
var err error
for {
if _, err = tr.Next(); err != nil {
break
}
cnt++
if s2 == "manual" {
if _, err = io.Copy(ioutil.Discard, tr); err != nil {
break
}
}
}
if err != v.err {
t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v",
i, s1, s2, err, v.err)
}
if cnt != v.cnt {
t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers",
i, s1, s2, cnt, v.cnt)
}
}
}
}
Binary file added src/archive/tar/testdata/pax-path-hdr.tar
Binary file not shown.
Binary file added src/archive/tar/testdata/ustar-file-reg.tar
Binary file not shown.

0 comments on commit e3b615f

Please sign in to comment.