diff --git a/datalad_next/itertools/decode_bytes.py b/datalad_next/itertools/decode_bytes.py index 18ff3a16..bb2cca63 100644 --- a/datalad_next/itertools/decode_bytes.py +++ b/datalad_next/itertools/decode_bytes.py @@ -103,7 +103,7 @@ def handle_decoding_error(position: int, else: return ( position + exc.end, - joined_data[:position + exc.start].decode(encoding) + joined_data[position:position + exc.start].decode(encoding) + joined_data[position + exc.start:position + exc.end].decode( encoding, errors='backslashreplace' diff --git a/datalad_next/itertools/tests/test_decode_bytes.py b/datalad_next/itertools/tests/test_decode_bytes.py index a463cc46..6139f7ca 100644 --- a/datalad_next/itertools/tests/test_decode_bytes.py +++ b/datalad_next/itertools/tests/test_decode_bytes.py @@ -35,3 +35,8 @@ def test_no_empty_strings(): # check that empty strings are not yielded r = tuple(decode_bytes([b'\xc3', b'\xb6'])) assert r == ('รถ',) + + +def test_multiple_errors(): + r = ''.join(decode_bytes([b'08 War \xaf No \xaf More \xaf Trouble.shn.mp3'])) + assert r == '08 War \\xaf No \\xaf More \\xaf Trouble.shn.mp3'