From e3b615fd6c633a05a5d4d46cc0345fdfb82c28e6 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 1 Oct 2015 02:30:29 -0700 Subject: [PATCH] archive/tar: detect truncated files Motivation: * Reader.skipUnread never reports io.ErrUnexpectedEOF. This is strange given that io.ErrUnexpectedEOF is given through Reader.Read if the user manually reads the file. * Reader.skipUnread fails to detect truncated files since io.Seeker is lazy about reporting errors. Thus, the behavior of Reader differs whether the input io.Reader also satisfies io.Seeker or not. To solve this, we seek to one before the end of the data section and always rely on at least one call to io.CopyN. If the tr.r satisfies io.Seeker, this is guarunteed to never read more than blockSize. Fixes #12557 Change-Id: I0ddddfc6bed0d74465cb7e7a02b26f1de7a7a279 Reviewed-on: https://go-review.googlesource.com/15175 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- src/archive/tar/reader.go | 54 +++++-- src/archive/tar/reader_test.go | 156 ++++++++++++++++---- src/archive/tar/testdata/pax-path-hdr.tar | Bin 0 -> 1024 bytes src/archive/tar/testdata/ustar-file-reg.tar | Bin 0 -> 1536 bytes 4 files changed, 172 insertions(+), 38 deletions(-) create mode 100644 src/archive/tar/testdata/pax-path-hdr.tar create mode 100644 src/archive/tar/testdata/ustar-file-reg.tar diff --git a/src/archive/tar/reader.go b/src/archive/tar/reader.go index b2c45fd38841d0..4af5807b72a6cb 100644 --- a/src/archive/tar/reader.go +++ b/src/archive/tar/reader.go @@ -446,16 +446,45 @@ func (tr *Reader) octal(b []byte) int64 { return int64(x) } -// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding. -func (tr *Reader) skipUnread() { - nr := tr.numBytes() + tr.pad // number of bytes to skip +// skipUnread skips any unread bytes in the existing file entry, as well as any +// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is +// encountered in the data portion; it is okay to hit io.EOF in the padding. +// +// Note that this function still works properly even when sparse files are being +// used since numBytes returns the bytes remaining in the underlying io.Reader. +func (tr *Reader) skipUnread() error { + dataSkip := tr.numBytes() // Number of data bytes to skip + totalSkip := dataSkip + tr.pad // Total number of bytes to skip tr.curr, tr.pad = nil, 0 - if sr, ok := tr.r.(io.Seeker); ok { - if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { - return + + // If possible, Seek to the last byte before the end of the data section. + // Do this because Seek is often lazy about reporting errors; this will mask + // the fact that the tar stream may be truncated. We can rely on the + // io.CopyN done shortly afterwards to trigger any IO errors. + var seekSkipped int64 // Number of bytes skipped via Seek + if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { + // Not all io.Seeker can actually Seek. For example, os.Stdin implements + // io.Seeker, but calling Seek always returns an error and performs + // no action. Thus, we try an innocent seek to the current position + // to see if Seek is really supported. + pos1, err := sr.Seek(0, os.SEEK_CUR) + if err == nil { + // Seek seems supported, so perform the real Seek. + pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR) + if err != nil { + tr.err = err + return tr.err + } + seekSkipped = pos2 - pos1 } } - _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr) + + var copySkipped int64 // Number of bytes skipped via CopyN + copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) + if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { + tr.err = io.ErrUnexpectedEOF + } + return tr.err } func (tr *Reader) verifyChecksum(header []byte) bool { @@ -468,18 +497,25 @@ func (tr *Reader) verifyChecksum(header []byte) bool { return given == unsigned || given == signed } +// readHeader reads the next block header and assumes that the underlying reader +// is already aligned to a block boundary. +// +// The err will be set to io.EOF only when one of the following occurs: +// * Exactly 0 bytes are read and EOF is hit. +// * Exactly 1 block of zeros is read and EOF is hit. +// * At least 2 blocks of zeros are read. func (tr *Reader) readHeader() *Header { header := tr.hdrBuff[:] copy(header, zeroBlock) if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { - return nil + return nil // io.EOF is okay here } // Two blocks of zero bytes marks the end of the archive. if bytes.Equal(header, zeroBlock[0:blockSize]) { if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { - return nil + return nil // io.EOF is okay here } if bytes.Equal(header, zeroBlock[0:blockSize]) { tr.err = io.EOF diff --git a/src/archive/tar/reader_test.go b/src/archive/tar/reader_test.go index 4a6d1a9e9f275b..f8b344da6e13c6 100644 --- a/src/archive/tar/reader_test.go +++ b/src/archive/tar/reader_test.go @@ -422,35 +422,6 @@ func TestPartialRead(t *testing.T) { } } -func TestNonSeekable(t *testing.T) { - test := gnuTarTest - f, err := os.Open(test.file) - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - defer f.Close() - - type readerOnly struct { - io.Reader - } - tr := NewReader(readerOnly{f}) - nread := 0 - - for ; ; nread++ { - _, err := tr.Next() - if err == io.EOF { - break - } - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - } - - if nread != len(test.headers) { - t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread) - } -} - func TestParsePAXHeader(t *testing.T) { paxTests := [][3]string{ {"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths @@ -803,3 +774,130 @@ func TestUninitializedRead(t *testing.T) { } } + +type reader struct{ io.Reader } +type readSeeker struct{ io.ReadSeeker } +type readBadSeeker struct{ io.ReadSeeker } + +func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") } + +// TestReadTruncation test the ending condition on various truncated files and +// that truncated files are still detected even if the underlying io.Reader +// satisfies io.Seeker. +func TestReadTruncation(t *testing.T) { + var ss []string + for _, p := range []string{ + "testdata/gnu.tar", + "testdata/ustar-file-reg.tar", + "testdata/pax-path-hdr.tar", + "testdata/sparse-formats.tar", + } { + buf, err := ioutil.ReadFile(p) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + ss = append(ss, string(buf)) + } + + data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3] + data2 += strings.Repeat("\x00", 10*512) + trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes + + var vectors = []struct { + input string // Input stream + cnt int // Expected number of headers read + err error // Expected error outcome + }{ + {"", 0, io.EOF}, // Empty file is a "valid" tar file + {data1[:511], 0, io.ErrUnexpectedEOF}, + {data1[:512], 1, io.ErrUnexpectedEOF}, + {data1[:1024], 1, io.EOF}, + {data1[:1536], 2, io.ErrUnexpectedEOF}, + {data1[:2048], 2, io.EOF}, + {data1, 2, io.EOF}, + {data1[:2048] + data2[:1536], 3, io.EOF}, + {data2[:511], 0, io.ErrUnexpectedEOF}, + {data2[:512], 1, io.ErrUnexpectedEOF}, + {data2[:1195], 1, io.ErrUnexpectedEOF}, + {data2[:1196], 1, io.EOF}, // Exact end of data and start of padding + {data2[:1200], 1, io.EOF}, + {data2[:1535], 1, io.EOF}, + {data2[:1536], 1, io.EOF}, // Exact end of padding + {data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF}, + {data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF}, + {data2[:1536] + trash, 1, ErrHeader}, + {data2[:2048], 1, io.EOF}, // Exactly 1 empty block + {data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF}, + {data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF}, + {data2[:2048] + trash, 1, ErrHeader}, + {data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream) + {data2[:2560] + trash[:1], 1, io.EOF}, + {data2[:2560] + trash[:511], 1, io.EOF}, + {data2[:2560] + trash, 1, io.EOF}, + {data2[:3072], 1, io.EOF}, + {pax, 0, io.EOF}, // PAX header without data is a "valid" tar file + {pax + trash[:1], 0, io.ErrUnexpectedEOF}, + {pax + trash[:511], 0, io.ErrUnexpectedEOF}, + {sparse[:511], 0, io.ErrUnexpectedEOF}, + // TODO(dsnet): This should pass, but currently fails. + // {sparse[:512], 0, io.ErrUnexpectedEOF}, + {sparse[:3584], 1, io.EOF}, + {sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header + {sparse[:9216], 1, io.EOF}, + {sparse[:9728], 2, io.ErrUnexpectedEOF}, + {sparse[:10240], 2, io.EOF}, + {sparse[:11264], 2, io.ErrUnexpectedEOF}, + {sparse, 5, io.EOF}, + {sparse + trash, 5, io.EOF}, + } + + for i, v := range vectors { + for j := 0; j < 6; j++ { + var tr *Reader + var s1, s2 string + + switch j { + case 0: + tr = NewReader(&reader{strings.NewReader(v.input)}) + s1, s2 = "io.Reader", "auto" + case 1: + tr = NewReader(&reader{strings.NewReader(v.input)}) + s1, s2 = "io.Reader", "manual" + case 2: + tr = NewReader(&readSeeker{strings.NewReader(v.input)}) + s1, s2 = "io.ReadSeeker", "auto" + case 3: + tr = NewReader(&readSeeker{strings.NewReader(v.input)}) + s1, s2 = "io.ReadSeeker", "manual" + case 4: + tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) + s1, s2 = "ReadBadSeeker", "auto" + case 5: + tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) + s1, s2 = "ReadBadSeeker", "manual" + } + + var cnt int + var err error + for { + if _, err = tr.Next(); err != nil { + break + } + cnt++ + if s2 == "manual" { + if _, err = io.Copy(ioutil.Discard, tr); err != nil { + break + } + } + } + if err != v.err { + t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v", + i, s1, s2, err, v.err) + } + if cnt != v.cnt { + t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers", + i, s1, s2, cnt, v.cnt) + } + } + } +} diff --git a/src/archive/tar/testdata/pax-path-hdr.tar b/src/archive/tar/testdata/pax-path-hdr.tar new file mode 100644 index 0000000000000000000000000000000000000000..ab8fc325b26159f4fed6bfb59fe5f616d35fec74 GIT binary patch literal 1024 zcmXR&EXmL>$=5GRO-#v6r43~O0Sq{30|OI7m>ft6gMqP;fsrYLLIndIKxuJFViC}K xO07co9Hr*bNx!kNLIE%d*akR880v$Gocz3WU67b=USe)47oFTOYR$le004hRKE?n5 literal 0 HcmV?d00001 diff --git a/src/archive/tar/testdata/ustar-file-reg.tar b/src/archive/tar/testdata/ustar-file-reg.tar new file mode 100644 index 0000000000000000000000000000000000000000..c84fa27ffb8613d20f7ac9690cd59827b04028f6 GIT binary patch literal 1536 zcmdUuJ(Hq95QhCJmlMG7O>hA!%P7hKg3OW)iVh<3F#{;S{=CgM+^V}b=Pyosep=F7x+*OI&?Q6F7LxR?fb`B^0 zttmJo<+m$~$Msw9KQ_wfqfW1sDJ#zsWq25)8&TsKn~+7*oF6~$0+nD?L2C$TBQM>$ zcQqo7Eo8w%PL1H9D9KY4`f7Ku6*oaxOv|7S1ZpTT=%sbGS#L2%*Uxm5P}U`mG$%9I zIRuF>849Ugh}k{mmgLJGtca9XnA{r2KBJ`fRXOnPqEZjWtz4z5Mq_`uZWX)VuFVko z#$DNd>@Saj1w+|ef@dPC=g!5Kb4c+mQ$bcu#R_I=;>D)V&agmv_`vpStP-sQh!z(| z5{9v2$DGKS|DrLqZ8y7?ox@|a-R^30zMeK388O@+r+cK=9NdZ)ow#}n{kwf`tD4=t zR9Oz?v}2QNv&rDR7u$%4%_>%5(k#={r!rZ(5WA5+U_Rz+w6{_kV7C!KK2e+5VuS-5 zWLRhpI#>Iq%|mhyZxDfRHCi7gC+R&sD4|k;~pXMNUD{^NPC5(KX~`*cXkwAurz(+XVNg z1P`yNv%FZy)9`uTGCWUJ^@>e2&T3O`W@re{ZN S9gn%XW0CXwKYp`+7X1&(X!Xnh literal 0 HcmV?d00001