From 01237957fb66900188e7139a23cdda9d5d422a7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Grobol?= Date: Thu, 2 Feb 2023 21:34:01 +0000 Subject: [PATCH] Enable encoding detection for the txt parser As of now, the txt parser reads files in text mode as UTF-8 and fails with other encodings. This makes it return a bytes object, leaving the base `decode` to figure out the encoding and act accordingly. --- textract/parsers/txt_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/textract/parsers/txt_parser.py b/textract/parsers/txt_parser.py index 94cbe251..2b76fd4a 100644 --- a/textract/parsers/txt_parser.py +++ b/textract/parsers/txt_parser.py @@ -5,5 +5,5 @@ class Parser(BaseParser): """Parse ``.txt`` files""" def extract(self, filename, **kwargs): - with open(filename) as stream: + with open(filename, "rb") as stream: return stream.read()