Azure · iscai-msft · Aug 3, 2021 · Jul 27, 2021 · Jul 27, 2021 · Jul 27, 2021
@@ -6,7 +6,9 @@
 
 - Cut hard dependency on requests library
 
-### Breaking Changes
+### Breaking Changes in the Provisional `azure.core.rest` package
+
+- `azure.core.rest` will not try to guess the `charset` anymore if it was impossible to extract it from `HttpResponse` analysis. This removes our dependency on `charset`.
 
 ### Key Bugs Fixed
 

@@ -50,11 +50,6 @@
     from urlparse import urlparse  # type: ignore
 except ImportError:
     from urllib.parse import urlparse
-try:
-    import cchardet as chardet
-except ImportError:  # pragma: no cover
-    import chardet  # type: ignore
-from ..exceptions import ResponseNotReadError
 
 ################################### TYPES SECTION #########################
 
@@ -285,22 +280,21 @@ def from_pipeline_transport_request_helper(request_class, pipeline_transport_req
     )
 
 def get_charset_encoding(response):
+    # type: (...) -> Optional[str]
     content_type = response.headers.get("Content-Type")
 
     if not content_type:
         return None
     _, params = cgi.parse_header(content_type)
     encoding = params.get('charset') # -> utf-8
-    if encoding is None:
-        if content_type in ("application/json", "application/rdap+json"):
-            # RFC 7159 states that the default encoding is UTF-8.
-            # RFC 7483 defines application/rdap+json
-            encoding = "utf-8"
-        else:
-            try:
-                encoding = chardet.detect(response.content)["encoding"]
-            except ResponseNotReadError:
-                pass
     if encoding is None or not lookup_encoding(encoding):
         return None
     return encoding
+
+def decode_to_text(encoding, content):
+    # type: (Optional[str], bytes) -> str
+    if encoding == "utf-8":
+        encoding = "utf-8-sig"
+    if encoding:
+        return content.decode(encoding)
+    return codecs.getincrementaldecoder("utf-8-sig")(errors="replace").decode(content)
@@ -42,6 +42,7 @@
     to_pipeline_transport_request_helper,
     from_pipeline_transport_request_helper,
     get_charset_encoding,
+    decode_to_text,
 )
 from ..exceptions import ResponseNotReadError
 if TYPE_CHECKING:
@@ -205,6 +206,7 @@ def __init__(self, **kwargs):
         self._json = None  # this is filled in ContentDecodePolicy, when we deserialize
         self._connection_data_block_size = None  # type: Optional[int]
         self._content = None  # type: Optional[bytes]
+        self._text = None  # type: Optional[str]
 
     @property
     def url(self):
@@ -215,13 +217,18 @@ def url(self):
     @property
     def encoding(self):
         # type: (...) -> Optional[str]
-        """Returns the response encoding. By default, is specified
-        by the response Content-Type header.
+        """Returns the response encoding.
+
+        :return: The response encoding. We either return the encoding set by the user,
+         or try extracting the encoding from the response's content type. If all fails,
+         we return `None`.
+        :rtype: optional[str]
         """
         try:
             return self._encoding
         except AttributeError:
-            return get_charset_encoding(self)
+            self._encoding = get_charset_encoding(self)  # type: Optional[str]
+            return self._encoding
 
     @encoding.setter
     def encoding(self, value):
@@ -233,10 +240,13 @@ def encoding(self, value):
     def text(self):
         # type: (...) -> str
         """Returns the response body as a string"""
-        encoding = self.encoding
-        if encoding == "utf-8" or encoding is None:
-            encoding = "utf-8-sig"
-        return self.content.decode(encoding)
+        if self._text is None:
+            content = self.content
+            if not content:
+                self._text = ""
+            else:
+                self._text = decode_to_text(self.encoding, self.content)
+        return self._text
 
     def json(self):
         # type: (...) -> Any

@@ -55,7 +55,8 @@
     format_parameters,
     to_pipeline_transport_request_helper,
     from_pipeline_transport_request_helper,
-    get_charset_encoding
+    get_charset_encoding,
+    decode_to_text,
 )
 from ._helpers_py3 import set_content_body
 from ..exceptions import ResponseNotReadError
@@ -235,6 +236,7 @@ def __init__(
         self._connection_data_block_size = None
         self._json = None  # this is filled in ContentDecodePolicy, when we deserialize
         self._content = None  # type: Optional[bytes]
+        self._text = None  # type: Optional[str]
 
     @property
     def url(self) -> str:
@@ -243,13 +245,18 @@ def url(self) -> str:
 
     @property
     def encoding(self) -> Optional[str]:
-        """Returns the response encoding. By default, is specified
-        by the response Content-Type header.
+        """Returns the response encoding.
+
+        :return: The response encoding. We either return the encoding set by the user,
+         or try extracting the encoding from the response's content type. If all fails,
+         we return `None`.
+        :rtype: optional[str]
         """
         try:
             return self._encoding
         except AttributeError:
-            return get_charset_encoding(self)
+            self._encoding: Optional[str] = get_charset_encoding(self)
+            return self._encoding
 
     @encoding.setter
     def encoding(self, value: str) -> None:
@@ -259,10 +266,13 @@ def encoding(self, value: str) -> None:
     @property
     def text(self) -> str:
         """Returns the response body as a string"""
-        encoding = self.encoding
-        if encoding == "utf-8" or encoding is None:
-            encoding = "utf-8-sig"
-        return self.content.decode(encoding)
+        if self._text is None:
+            content = self.content
+            if not content:
+                self._text = ""
+            else:
+                self._text = decode_to_text(self.encoding, self.content)
+        return self._text
 
     def json(self) -> Any:
         """Returns the whole body as a json object.

@@ -149,7 +149,7 @@ async def test_response_no_charset_with_ascii_content(send_request):
 
     assert response.headers["Content-Type"] == "text/plain"
     assert response.status_code == 200
-    assert response.encoding == 'ascii'
+    assert response.encoding is None
     content = await response.read()
     assert content == b"Hello, world!"
     assert response.text == "Hello, world!"
@@ -165,8 +165,8 @@ async def test_response_no_charset_with_iso_8859_1_content(send_request):
         request=HttpRequest("GET", "/encoding/iso-8859-1"),
     )
     await response.read()
-    assert response.text == u"Accented: Österreich"
-    assert response.encoding == 'ISO-8859-1'
+    assert response.text == "Accented: �sterreich" # aiohttp is having diff behavior than requests
+    assert response.encoding is None
 
 # NOTE: aiohttp isn't liking this
 # @pytest.mark.asyncio
@@ -187,7 +187,7 @@ async def test_json(send_request):
     )
     await response.read()
     assert response.json() == {"greeting": "hello", "recipient": "world"}
-    assert response.encoding == 'utf-8'
+    assert response.encoding is None
 
 @pytest.mark.asyncio
 async def test_json_with_specified_encoding(send_request):

@@ -138,7 +138,7 @@ def test_response_no_charset_with_ascii_content(send_request):
 
     assert response.headers["Content-Type"] == "text/plain"
     assert response.status_code == 200
-    assert response.encoding == 'ascii'
+    assert response.encoding is None
     assert response.text == "Hello, world!"
 
 
@@ -151,7 +151,7 @@ def test_response_no_charset_with_iso_8859_1_content(send_request):
         request=HttpRequest("GET", "/encoding/iso-8859-1"),
     )
     assert response.text == u"Accented: Österreich"
-    assert response.encoding == 'ISO-8859-1'
+    assert response.encoding is None
 
 def test_response_set_explicit_encoding(send_request):
     # Deliberately incorrect charset
@@ -168,7 +168,7 @@ def test_json(send_request):
         request=HttpRequest("GET", "/basic/json"),
     )
     assert response.json() == {"greeting": "hello", "recipient": "world"}
-    assert response.encoding == 'utf-8-sig'  # for requests, we use utf-8-sig instead of utf-8 bc of requests behavior
+    assert response.encoding is None
 
 def test_json_with_specified_encoding(send_request):
     response = send_request(