From 39deb33defe40958701baff41ed6d93b24058b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Kvalsvik?= Date: Mon, 4 Mar 2019 14:11:20 +0100 Subject: [PATCH] Avoid crash on too large padbyte values Some files have rather non-sensical padbyte field following a segment, with it being as large as the full segment, including the header. When chopping padbytes off actual record information, this needs to be taken into account in order to avoid overflow, or realloc-to-negative. --- lib/src/io.cpp | 11 +++++++++-- python/data/padbytes-large-as-record.dlis | Bin 0 -> 180 bytes python/tests/test_core.py | 10 ++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 python/data/padbytes-large-as-record.dlis diff --git a/lib/src/io.cpp b/lib/src/io.cpp index 7316046e8..d31a654b6 100644 --- a/lib/src/io.cpp +++ b/lib/src/io.cpp @@ -279,8 +279,15 @@ record& stream::at( int i, record& rec ) noexcept (false) { this->fs.seekg( tell ); - const auto chop = []( std::vector< char >& vec, int bytes ) { - vec.erase( vec.end() - bytes, vec.end() ); + const auto chop = [](std::vector< char >& vec, int bytes) { + const int size = vec.size(); + const int new_size = (std::max)(0, size - bytes); + + if (size - bytes < 0) { + // TODO: user-warning + // const auto msg = "at::chop() would remove more bytes than read"; + } + vec.resize(new_size); }; while (true) { diff --git a/python/data/padbytes-large-as-record.dlis b/python/data/padbytes-large-as-record.dlis new file mode 100644 index 0000000000000000000000000000000000000000..abb66953e53217be4251e3505aa53ebc2e1baa8d GIT binary patch literal 180 OcmZSB(#SBxz!m`5_5tbu literal 0 HcmV?d00001 diff --git a/python/tests/test_core.py b/python/tests/test_core.py index 7c4d339b8..beb5e0044 100644 --- a/python/tests/test_core.py +++ b/python/tests/test_core.py @@ -278,3 +278,13 @@ def test_load_pre_vrl_garbage(): def test_load_file_with_broken_utf8(): with dlisio.load('data/broken-degree-symbol.dlis') as f: pass + +def test_padbytes_as_large_as_record(): + # 180-byte long explicit record with padding, and padbytes are set to 180 + # (leaving the resulting len(data) == 0) + f = dlisio.open('data/padbytes-large-as-record.dlis') + f.reindex([0], [180]) + + rec = f.extract([0])[0] + assert rec.explicit + assert len(memoryview(rec)) == 0