From 63a233f72b777234b8bd096ed5449b8e82e3b5a1 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Mon, 24 Apr 2023 11:08:36 -0700 Subject: [PATCH 01/11] fix json load encoding error --- DESCRIPTION.md | 4 ++++ src/snowflake/connector/result_batch.py | 3 +++ src/snowflake/connector/version.py | 2 +- test/integ/test_cursor.py | 13 +++++++++++++ 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION.md b/DESCRIPTION.md index c21efcac2..c65df46c5 100644 --- a/DESCRIPTION.md +++ b/DESCRIPTION.md @@ -8,6 +8,10 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne # Release Notes +- v3.0.4(Unreleased) + + - Fixed a bug that JSON formatted returning data is decoded in wrong encoding. + - v3.0.3(April 20, 2023) - Fixed a bug that prints error in logs for GET command on GCS. diff --git a/src/snowflake/connector/result_batch.py b/src/snowflake/connector/result_batch.py index a90c467b3..646b37802 100644 --- a/src/snowflake/connector/result_batch.py +++ b/src/snowflake/connector/result_batch.py @@ -423,6 +423,9 @@ def _load(self, response: Response) -> list: Unfortunately there's not type hint for this. For context: https://github.com/python/typing/issues/182 """ + # SNOW-787480, response.apparent_encoding is unreliable, chardet.detect can be wrong + # we set encoding to be utf-8 if encoding is not specified in the response header + response.encoding = response.encoding or "utf-8" read_data = response.text return json.loads("".join(["[", read_data, "]"])) diff --git a/src/snowflake/connector/version.py b/src/snowflake/connector/version.py index 146b15a63..f7e2ea2da 100644 --- a/src/snowflake/connector/version.py +++ b/src/snowflake/connector/version.py @@ -1,3 +1,3 @@ # Update this for the versions # Don't change the forth version number from None -VERSION = (3, 0, 3, None) +VERSION = (3, 0, 4, None) diff --git a/test/integ/test_cursor.py b/test/integ/test_cursor.py index 2b8335a65..a45c40023 100644 --- a/test/integ/test_cursor.py +++ b/test/integ/test_cursor.py @@ -1618,3 +1618,16 @@ def test_multi_statement_failure(conn_cnx): CLIENT_VERSION, (type(None), str), ) + + +def test_encoding_utf8_for_json_load(conn_cnx): + # SNOW-787480, if not explicitly setting utf-8 encoding, the data will be + # detected encoding as windows-1250 by chardet.detect + # which is wrong, with the utf-8 fix, we can get the correct decoded data + with conn_cnx() as con, con.cursor() as cur: + cur.execute("alter session set python_connector_query_result_format='JSON'") + ret = cur.execute( + """select '"",' || '"",' || '"",' || '"",' || '"",' || 'Ofigràfic' || '"",' from TABLE(GENERATOR(ROWCOUNT => 5000)) v;""" + ).fetchall() + assert len(ret) == 5000 + assert ret[0] == ('"","","","","",Ofigràfic"",',) From 1777d5ceed7c8e4b776626c90beb7ec6252d7831 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Mon, 24 Apr 2023 15:16:57 -0700 Subject: [PATCH 02/11] update implementation --- src/snowflake/connector/result_batch.py | 10 ++++++++-- test/integ/test_cursor.py | 9 +++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/snowflake/connector/result_batch.py b/src/snowflake/connector/result_batch.py index 646b37802..e03db2bd1 100644 --- a/src/snowflake/connector/result_batch.py +++ b/src/snowflake/connector/result_batch.py @@ -424,9 +424,15 @@ def _load(self, response: Response) -> list: For context: https://github.com/python/typing/issues/182 """ # SNOW-787480, response.apparent_encoding is unreliable, chardet.detect can be wrong + # we try decoding as utf-8 first, # we set encoding to be utf-8 if encoding is not specified in the response header - response.encoding = response.encoding or "utf-8" - read_data = response.text + try: + read_data = str(response.content, "utf-8", errors="strict") + except UnicodeError: + logger.debug( + f"utf-8 decoding failed and fell back to automatic decoder for result batch id: {self.id}" + ) + read_data = response.text return json.loads("".join(["[", read_data, "]"])) def _parse( diff --git a/test/integ/test_cursor.py b/test/integ/test_cursor.py index a45c40023..8f069c59a 100644 --- a/test/integ/test_cursor.py +++ b/test/integ/test_cursor.py @@ -53,6 +53,7 @@ class ResultMetadata(NamedTuple): ) from snowflake.connector.sqlstate import SQLSTATE_FEATURE_NOT_SUPPORTED from snowflake.connector.telemetry import TelemetryField +from snowflake.connector.vendored.requests import Response try: from snowflake.connector.util_text import random_string @@ -1624,6 +1625,14 @@ def test_encoding_utf8_for_json_load(conn_cnx): # SNOW-787480, if not explicitly setting utf-8 encoding, the data will be # detected encoding as windows-1250 by chardet.detect # which is wrong, with the utf-8 fix, we can get the correct decoded data + + local_result_batch = JSONResultBatch(None, None, None, None, None, None) + resp = Response() + resp._content = '{"key": "á"}'.encode("latin1") + assert local_result_batch._load(resp) == [ + {"key": "с"} + ] # it is expected to be wrong + with conn_cnx() as con, con.cursor() as cur: cur.execute("alter session set python_connector_query_result_format='JSON'") ret = cur.execute( From 32a7d6dfcf6df36cbbf0e1c82d2ffbf30465a893 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Mon, 24 Apr 2023 15:19:43 -0700 Subject: [PATCH 03/11] update comment --- src/snowflake/connector/result_batch.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/snowflake/connector/result_batch.py b/src/snowflake/connector/result_batch.py index e03db2bd1..e86f14b92 100644 --- a/src/snowflake/connector/result_batch.py +++ b/src/snowflake/connector/result_batch.py @@ -423,9 +423,9 @@ def _load(self, response: Response) -> list: Unfortunately there's not type hint for this. For context: https://github.com/python/typing/issues/182 """ - # SNOW-787480, response.apparent_encoding is unreliable, chardet.detect can be wrong - # we try decoding as utf-8 first, - # we set encoding to be utf-8 if encoding is not specified in the response header + # SNOW-787480, response.apparent_encoding is unreliable, chardet.detect can be wrong which is used by + # response.text to decode content. + # Instead, we try decoding as utf-8 first, if we hit UnicodeError, we fall back to the auto-detection. try: read_data = str(response.content, "utf-8", errors="strict") except UnicodeError: From fe98585753d3dcb22101e339baeb9fcf3953696c Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Mon, 24 Apr 2023 15:36:46 -0700 Subject: [PATCH 04/11] skip old driver --- test/integ/test_cursor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/integ/test_cursor.py b/test/integ/test_cursor.py index 8f069c59a..6694a7fd7 100644 --- a/test/integ/test_cursor.py +++ b/test/integ/test_cursor.py @@ -53,7 +53,6 @@ class ResultMetadata(NamedTuple): ) from snowflake.connector.sqlstate import SQLSTATE_FEATURE_NOT_SUPPORTED from snowflake.connector.telemetry import TelemetryField -from snowflake.connector.vendored.requests import Response try: from snowflake.connector.util_text import random_string @@ -1621,12 +1620,15 @@ def test_multi_statement_failure(conn_cnx): ) +@pytest.mark.skipolddriver def test_encoding_utf8_for_json_load(conn_cnx): # SNOW-787480, if not explicitly setting utf-8 encoding, the data will be # detected encoding as windows-1250 by chardet.detect # which is wrong, with the utf-8 fix, we can get the correct decoded data local_result_batch = JSONResultBatch(None, None, None, None, None, None) + from snowflake.connector.vendored.requests import Response + resp = Response() resp._content = '{"key": "á"}'.encode("latin1") assert local_result_batch._load(resp) == [ From df7f23a0d6d80be3be1e01e4c36c3482e31924d3 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Tue, 25 Apr 2023 17:43:53 -0700 Subject: [PATCH 05/11] change implementation --- DESCRIPTION.md | 2 +- src/snowflake/connector/connection.py | 7 +++++++ src/snowflake/connector/result_batch.py | 26 +++++++++++++++---------- src/snowflake/connector/version.py | 2 +- test/integ/test_cursor.py | 25 +++++++++++------------- 5 files changed, 36 insertions(+), 26 deletions(-) diff --git a/DESCRIPTION.md b/DESCRIPTION.md index c65df46c5..bf47d9da9 100644 --- a/DESCRIPTION.md +++ b/DESCRIPTION.md @@ -10,7 +10,7 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne - v3.0.4(Unreleased) - - Fixed a bug that JSON formatted returning data is decoded in wrong encoding. + - Added the json_result_force_utf8_decoding connection parameter to force decoding JSON content in utf-8 when the result format is JSON. - v3.0.3(April 20, 2023) diff --git a/src/snowflake/connector/connection.py b/src/snowflake/connector/connection.py index 23c8d3f4f..a122b7e76 100644 --- a/src/snowflake/connector/connection.py +++ b/src/snowflake/connector/connection.py @@ -204,6 +204,10 @@ def DefaultConverterClass() -> type: True, bool, ), # Whether to log imported packages in telemetry + "json_result_force_utf8_decoding": ( + False, + bool, + ), # Whether to force the JSON content to be decoded in utf-8, it only works when result format is JSON } APPLICATION_RE = re.compile(r"[\w\d_]+") @@ -265,6 +269,9 @@ class SnowflakeConnection: enable_connection_diag: when true, clients will generate a connectivity diagnostic report. connection_diag_log_path: path to location to create diag report with enable_connection_diag. connection_diag_whitelist_path: path to a whitelist.json file to test with enable_connection_diag. + json_result_force_utf8_decoding: When true, json result will be decoded in utf-8, + when false, the encoding of the content is auto-detected. Default value is false. + This parameter only works when the result format is JSON. """ OCSP_ENV_LOCK = Lock() diff --git a/src/snowflake/connector/result_batch.py b/src/snowflake/connector/result_batch.py index e86f14b92..791d7e0d7 100644 --- a/src/snowflake/connector/result_batch.py +++ b/src/snowflake/connector/result_batch.py @@ -132,6 +132,7 @@ def remote_chunk_info(c: dict[str, Any]) -> RemoteChunkInfo: schema, column_converters, cursor._use_dict_result, + json_result_force_utf8_decoding=cursor._connection._json_result_force_utf8_decoding, ) for c in chunks ] @@ -384,6 +385,8 @@ def __init__( schema: Sequence[ResultMetadata], column_converters: Sequence[tuple[str, SnowflakeConverterType]], use_dict_result: bool, + *, + json_result_force_utf8_decoding: bool = False, ) -> None: super().__init__( rowcount, @@ -392,6 +395,7 @@ def __init__( schema, use_dict_result, ) + self._json_result_force_utf8_decoding = json_result_force_utf8_decoding self.column_converters = column_converters @classmethod @@ -420,18 +424,20 @@ def _load(self, response: Response) -> list: Returns: Whatever ``json.loads`` return, but in a list. - Unfortunately there's not type hint for this. + Unfortunately there's no type hint for this. For context: https://github.com/python/typing/issues/182 """ - # SNOW-787480, response.apparent_encoding is unreliable, chardet.detect can be wrong which is used by - # response.text to decode content. - # Instead, we try decoding as utf-8 first, if we hit UnicodeError, we fall back to the auto-detection. - try: - read_data = str(response.content, "utf-8", errors="strict") - except UnicodeError: - logger.debug( - f"utf-8 decoding failed and fell back to automatic decoder for result batch id: {self.id}" - ) + # if users specify how to decode the data, we decode the bytes using the specified encoding + if self._json_result_force_utf8_decoding: + try: + read_data = str(response.content, "utf-8", errors="strict") + except Exception as exc: + err_msg = f"failed to decode json result content due to error {exc!r}" + logger.error(err_msg) + raise Error(msg=err_msg) + else: + # note: SNOW-787480 response.apparent_encoding is unreliable, chardet.detect can be wrong which is used by + # response.text to decode content read_data = response.text return json.loads("".join(["[", read_data, "]"])) diff --git a/src/snowflake/connector/version.py b/src/snowflake/connector/version.py index f7e2ea2da..146b15a63 100644 --- a/src/snowflake/connector/version.py +++ b/src/snowflake/connector/version.py @@ -1,3 +1,3 @@ # Update this for the versions # Don't change the forth version number from None -VERSION = (3, 0, 4, None) +VERSION = (3, 0, 3, None) diff --git a/test/integ/test_cursor.py b/test/integ/test_cursor.py index 6694a7fd7..81f741194 100644 --- a/test/integ/test_cursor.py +++ b/test/integ/test_cursor.py @@ -1621,21 +1621,18 @@ def test_multi_statement_failure(conn_cnx): @pytest.mark.skipolddriver -def test_encoding_utf8_for_json_load(conn_cnx): - # SNOW-787480, if not explicitly setting utf-8 encoding, the data will be - # detected encoding as windows-1250 by chardet.detect - # which is wrong, with the utf-8 fix, we can get the correct decoded data - - local_result_batch = JSONResultBatch(None, None, None, None, None, None) - from snowflake.connector.vendored.requests import Response - - resp = Response() - resp._content = '{"key": "á"}'.encode("latin1") - assert local_result_batch._load(resp) == [ - {"key": "с"} - ] # it is expected to be wrong - +def test_decoding_utf8_for_json_result(conn_cnx): + # SNOW-787480, if not explicitly setting utf-8 decoding, the data will be + # detected decoding as windows-1250 by chardet.detect with conn_cnx() as con, con.cursor() as cur: + cur.execute("alter session set python_connector_query_result_format='JSON'") + ret = cur.execute( + """select '"",' || '"",' || '"",' || '"",' || '"",' || 'Ofigràfic' || '"",' from TABLE(GENERATOR(ROWCOUNT => 5000)) v;""" + ).fetchall() + assert len(ret) == 5000 + assert ret[0] == ('"","","","","",OfigrĂ\xa0fic"",',) + + with conn_cnx(json_result_force_utf8_decoding=True) as con, con.cursor() as cur: cur.execute("alter session set python_connector_query_result_format='JSON'") ret = cur.execute( """select '"",' || '"",' || '"",' || '"",' || '"",' || 'Ofigràfic' || '"",' from TABLE(GENERATOR(ROWCOUNT => 5000)) v;""" From 8737215baed5b74883f7c0bd1551083e6966ac90 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Tue, 25 Apr 2023 17:48:39 -0700 Subject: [PATCH 06/11] link gh issue --- src/snowflake/connector/result_batch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snowflake/connector/result_batch.py b/src/snowflake/connector/result_batch.py index 791d7e0d7..b0c40be2c 100644 --- a/src/snowflake/connector/result_batch.py +++ b/src/snowflake/connector/result_batch.py @@ -437,7 +437,7 @@ def _load(self, response: Response) -> list: raise Error(msg=err_msg) else: # note: SNOW-787480 response.apparent_encoding is unreliable, chardet.detect can be wrong which is used by - # response.text to decode content + # response.text to decode content, check issue: https://github.com/chardet/chardet/issues/148 read_data = response.text return json.loads("".join(["[", read_data, "]"])) From 1e1449e2fe8e34e448f6a494384348df7282dcc4 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Wed, 26 Apr 2023 11:39:02 -0700 Subject: [PATCH 07/11] update tests --- test/integ/test_cursor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integ/test_cursor.py b/test/integ/test_cursor.py index 81f741194..a71ece2ca 100644 --- a/test/integ/test_cursor.py +++ b/test/integ/test_cursor.py @@ -1630,7 +1630,7 @@ def test_decoding_utf8_for_json_result(conn_cnx): """select '"",' || '"",' || '"",' || '"",' || '"",' || 'Ofigràfic' || '"",' from TABLE(GENERATOR(ROWCOUNT => 5000)) v;""" ).fetchall() assert len(ret) == 5000 - assert ret[0] == ('"","","","","",OfigrĂ\xa0fic"",',) + assert ret[0] != ('"","","","","",Ofigràfic"",',) with conn_cnx(json_result_force_utf8_decoding=True) as con, con.cursor() as cur: cur.execute("alter session set python_connector_query_result_format='JSON'") From e26874074d2bcb2f253fd4b36650751c06ee62d8 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Thu, 27 Apr 2023 16:10:55 -0700 Subject: [PATCH 08/11] update tests --- src/snowflake/connector/connection.py | 4 ++-- test/integ/test_cursor.py | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/snowflake/connector/connection.py b/src/snowflake/connector/connection.py index a122b7e76..c84f8c450 100644 --- a/src/snowflake/connector/connection.py +++ b/src/snowflake/connector/connection.py @@ -207,7 +207,7 @@ def DefaultConverterClass() -> type: "json_result_force_utf8_decoding": ( False, bool, - ), # Whether to force the JSON content to be decoded in utf-8, it only works when result format is JSON + ), # Whether to force the JSON content to be decoded in utf-8, it is only effective when result format is JSON } APPLICATION_RE = re.compile(r"[\w\d_]+") @@ -271,7 +271,7 @@ class SnowflakeConnection: connection_diag_whitelist_path: path to a whitelist.json file to test with enable_connection_diag. json_result_force_utf8_decoding: When true, json result will be decoded in utf-8, when false, the encoding of the content is auto-detected. Default value is false. - This parameter only works when the result format is JSON. + This parameter is only effective when the result format is JSON. """ OCSP_ENV_LOCK = Lock() diff --git a/test/integ/test_cursor.py b/test/integ/test_cursor.py index a71ece2ca..8133aae7c 100644 --- a/test/integ/test_cursor.py +++ b/test/integ/test_cursor.py @@ -1630,7 +1630,13 @@ def test_decoding_utf8_for_json_result(conn_cnx): """select '"",' || '"",' || '"",' || '"",' || '"",' || 'Ofigràfic' || '"",' from TABLE(GENERATOR(ROWCOUNT => 5000)) v;""" ).fetchall() assert len(ret) == 5000 - assert ret[0] != ('"","","","","",Ofigràfic"",',) + # This test case is tricky, for most of the test cases, the decoding is incorrect and can could be different + # on different platforms, however, due to randomness, in rare cases the decoding is indeed utf-8, + # the backend behavior is flaky + assert ret[0] in ( + ('"","","","","",OfigrĂ\xa0fic"",',), + ('"","","","","",Ofigràfic"",',), + ) with conn_cnx(json_result_force_utf8_decoding=True) as con, con.cursor() as cur: cur.execute("alter session set python_connector_query_result_format='JSON'") From cfef7fcad07550efd1d10f9ab2aaef5e5643c29f Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Tue, 2 May 2023 10:28:05 -0700 Subject: [PATCH 09/11] review feedback --- test/integ/test_cursor.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/test/integ/test_cursor.py b/test/integ/test_cursor.py index 8133aae7c..452552805 100644 --- a/test/integ/test_cursor.py +++ b/test/integ/test_cursor.py @@ -1624,24 +1624,25 @@ def test_multi_statement_failure(conn_cnx): def test_decoding_utf8_for_json_result(conn_cnx): # SNOW-787480, if not explicitly setting utf-8 decoding, the data will be # detected decoding as windows-1250 by chardet.detect - with conn_cnx() as con, con.cursor() as cur: - cur.execute("alter session set python_connector_query_result_format='JSON'") - ret = cur.execute( - """select '"",' || '"",' || '"",' || '"",' || '"",' || 'Ofigràfic' || '"",' from TABLE(GENERATOR(ROWCOUNT => 5000)) v;""" - ).fetchall() + with conn_cnx( + session_parameters={"python_connector_query_result_format": "JSON"} + ) as con, con.cursor() as cur: + sql = """select '"",' || '"",' || '"",' || '"",' || '"",' || 'Ofigràfic' || '"",' from TABLE(GENERATOR(ROWCOUNT => 5000)) v;""" + ret = cur.execute(sql).fetchall() assert len(ret) == 5000 # This test case is tricky, for most of the test cases, the decoding is incorrect and can could be different # on different platforms, however, due to randomness, in rare cases the decoding is indeed utf-8, # the backend behavior is flaky assert ret[0] in ( - ('"","","","","",OfigrĂ\xa0fic"",',), - ('"","","","","",Ofigràfic"",',), + ('"","","","","",OfigrĂ\xa0fic"",',), # AWS Mac Cloud + ('"","","","","",OfigrÃ\xa0fic"",',), # GCP Mac and Linux Cloud + ('"","","","","",Ofigr\xc3\\xa0fic"",',), # GCP Windows Cloud ) - with conn_cnx(json_result_force_utf8_decoding=True) as con, con.cursor() as cur: - cur.execute("alter session set python_connector_query_result_format='JSON'") - ret = cur.execute( - """select '"",' || '"",' || '"",' || '"",' || '"",' || 'Ofigràfic' || '"",' from TABLE(GENERATOR(ROWCOUNT => 5000)) v;""" - ).fetchall() + with conn_cnx( + session_parameters={"python_connector_query_result_format": "JSON"}, + json_result_force_utf8_decoding=True, + ) as con, con.cursor() as cur: + ret = cur.execute(sql).fetchall() assert len(ret) == 5000 assert ret[0] == ('"","","","","",Ofigràfic"",',) From 1331bc64c04ad7c5e6506a0b5f989b72dffb0be3 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Tue, 2 May 2023 12:02:52 -0700 Subject: [PATCH 10/11] update tests --- test/integ/test_cursor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/integ/test_cursor.py b/test/integ/test_cursor.py index 452552805..d1fd9e395 100644 --- a/test/integ/test_cursor.py +++ b/test/integ/test_cursor.py @@ -1634,9 +1634,12 @@ def test_decoding_utf8_for_json_result(conn_cnx): # on different platforms, however, due to randomness, in rare cases the decoding is indeed utf-8, # the backend behavior is flaky assert ret[0] in ( - ('"","","","","",OfigrĂ\xa0fic"",',), # AWS Mac Cloud + ('"","","","","",OfigrĂ\xa0fic"",',), # AWS Cloud ('"","","","","",OfigrÃ\xa0fic"",',), # GCP Mac and Linux Cloud ('"","","","","",Ofigr\xc3\\xa0fic"",',), # GCP Windows Cloud + ( + '"","","","","",Ofigràfic"",', + ), # regression environment gets the correct decoding ) with conn_cnx( From 98fea131f7156c8a2fb36a9414d530a7844d0da6 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Tue, 2 May 2023 13:20:40 -0700 Subject: [PATCH 11/11] update tests --- test/integ/test_cursor.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/integ/test_cursor.py b/test/integ/test_cursor.py index d1fd9e395..f090dfde5 100644 --- a/test/integ/test_cursor.py +++ b/test/integ/test_cursor.py @@ -51,6 +51,7 @@ class ResultMetadata(NamedTuple): ER_FAILED_TO_REWRITE_MULTI_ROW_INSERT, ER_NOT_POSITIVE_SIZE, ) +from snowflake.connector.errors import Error from snowflake.connector.sqlstate import SQLSTATE_FEATURE_NOT_SUPPORTED from snowflake.connector.telemetry import TelemetryField @@ -1649,3 +1650,11 @@ def test_decoding_utf8_for_json_result(conn_cnx): ret = cur.execute(sql).fetchall() assert len(ret) == 5000 assert ret[0] == ('"","","","","",Ofigràfic"",',) + + result_batch = JSONResultBatch( + None, None, None, None, None, False, json_result_force_utf8_decoding=True + ) + mock_resp = mock.Mock() + mock_resp.content = "À".encode("latin1") + with pytest.raises(Error): + result_batch._load(mock_resp)