From 85e128b2d91db1ec90df2be0503e1d061c3e2de7 Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sat, 28 Jan 2023 20:41:07 +0000 Subject: [PATCH 1/6] Upgrade platformdirs to 2.6.2 --- news/platformdirs.vendor.rst | 2 +- src/pip/_vendor/platformdirs/__init__.py | 12 +++++++----- src/pip/_vendor/platformdirs/unix.py | 4 ++-- src/pip/_vendor/platformdirs/version.py | 8 ++++---- src/pip/_vendor/vendor.txt | 2 +- 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/news/platformdirs.vendor.rst b/news/platformdirs.vendor.rst index 04ee05723b1..5c15bfbd9d5 100644 --- a/news/platformdirs.vendor.rst +++ b/news/platformdirs.vendor.rst @@ -1 +1 @@ -Upgrade platformdirs to 2.5.3 +Upgrade platformdirs to 2.6.2 diff --git a/src/pip/_vendor/platformdirs/__init__.py b/src/pip/_vendor/platformdirs/__init__.py index 9d513dcf177..82d907163c7 100644 --- a/src/pip/_vendor/platformdirs/__init__.py +++ b/src/pip/_vendor/platformdirs/__init__.py @@ -7,13 +7,15 @@ import os import sys from pathlib import Path -from typing import TYPE_CHECKING -if TYPE_CHECKING: - from pip._vendor.typing_extensions import Literal # pragma: no cover +if sys.version_info >= (3, 8): # pragma: no cover (py38+) + from typing import Literal +else: # pragma: no cover (py38+) + from pip._vendor.typing_extensions import Literal from .api import PlatformDirsABC -from .version import __version__, __version_info__ +from .version import __version__ +from .version import __version_tuple__ as __version_info__ def _set_platform_dir_class() -> type[PlatformDirsABC]: @@ -26,7 +28,7 @@ def _set_platform_dir_class() -> type[PlatformDirsABC]: if os.getenv("ANDROID_DATA") == "/data" and os.getenv("ANDROID_ROOT") == "/system": - if os.getenv("SHELL") is not None: + if os.getenv("SHELL") or os.getenv("PREFIX"): return Result from pip._vendor.platformdirs.android import _android_folder diff --git a/src/pip/_vendor/platformdirs/unix.py b/src/pip/_vendor/platformdirs/unix.py index 2fbd4d4f367..9aca5a03054 100644 --- a/src/pip/_vendor/platformdirs/unix.py +++ b/src/pip/_vendor/platformdirs/unix.py @@ -107,9 +107,9 @@ def user_state_dir(self) -> str: @property def user_log_dir(self) -> str: """ - :return: log directory tied to the user, same as `user_data_dir` if not opinionated else ``log`` in it + :return: log directory tied to the user, same as `user_state_dir` if not opinionated else ``log`` in it """ - path = self.user_cache_dir + path = self.user_state_dir if self.opinion: path = os.path.join(path, "log") return path diff --git a/src/pip/_vendor/platformdirs/version.py b/src/pip/_vendor/platformdirs/version.py index 6361dbf9c07..9f6eb98e8f0 100644 --- a/src/pip/_vendor/platformdirs/version.py +++ b/src/pip/_vendor/platformdirs/version.py @@ -1,4 +1,4 @@ -"""Version information""" - -__version__ = "2.5.3" -__version_info__ = (2, 5, 3) +# file generated by setuptools_scm +# don't change, don't track in version control +__version__ = version = '2.6.2' +__version_tuple__ = version_tuple = (2, 6, 2) diff --git a/src/pip/_vendor/vendor.txt b/src/pip/_vendor/vendor.txt index 26afe72d198..282f627a6a5 100644 --- a/src/pip/_vendor/vendor.txt +++ b/src/pip/_vendor/vendor.txt @@ -4,7 +4,7 @@ distlib==0.3.6 distro==1.8.0 msgpack==1.0.4 packaging==21.3 -platformdirs==2.5.3 +platformdirs==2.6.2 pyparsing==3.0.9 pyproject-hooks==1.0.0 requests==2.28.1 From fb17ee1e914e6601ae96399443e3dc4a552f9d0c Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sat, 28 Jan 2023 20:41:19 +0000 Subject: [PATCH 2/6] Upgrade requests to 2.28.2 --- news/requests.vendor.rst | 1 + src/pip/_vendor/requests/__init__.py | 4 ++-- src/pip/_vendor/requests/__version__.py | 6 +++--- src/pip/_vendor/requests/models.py | 2 +- src/pip/_vendor/vendor.txt | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) create mode 100644 news/requests.vendor.rst diff --git a/news/requests.vendor.rst b/news/requests.vendor.rst new file mode 100644 index 00000000000..9f91985c70c --- /dev/null +++ b/news/requests.vendor.rst @@ -0,0 +1 @@ +Upgrade requests to 2.28.2 diff --git a/src/pip/_vendor/requests/__init__.py b/src/pip/_vendor/requests/__init__.py index 9e97059d1db..a4776248038 100644 --- a/src/pip/_vendor/requests/__init__.py +++ b/src/pip/_vendor/requests/__init__.py @@ -77,8 +77,8 @@ def check_compatibility(urllib3_version, chardet_version, charset_normalizer_ver elif charset_normalizer_version: major, minor, patch = charset_normalizer_version.split(".")[:3] major, minor, patch = int(major), int(minor), int(patch) - # charset_normalizer >= 2.0.0 < 3.0.0 - assert (2, 0, 0) <= (major, minor, patch) < (3, 0, 0) + # charset_normalizer >= 2.0.0 < 4.0.0 + assert (2, 0, 0) <= (major, minor, patch) < (4, 0, 0) else: raise Exception("You need either charset_normalizer or chardet installed") diff --git a/src/pip/_vendor/requests/__version__.py b/src/pip/_vendor/requests/__version__.py index e725ada6550..69be3dec741 100644 --- a/src/pip/_vendor/requests/__version__.py +++ b/src/pip/_vendor/requests/__version__.py @@ -5,10 +5,10 @@ __title__ = "requests" __description__ = "Python HTTP for Humans." __url__ = "https://requests.readthedocs.io" -__version__ = "2.28.1" -__build__ = 0x022801 +__version__ = "2.28.2" +__build__ = 0x022802 __author__ = "Kenneth Reitz" __author_email__ = "me@kennethreitz.org" __license__ = "Apache 2.0" -__copyright__ = "Copyright 2022 Kenneth Reitz" +__copyright__ = "Copyright Kenneth Reitz" __cake__ = "\u2728 \U0001f370 \u2728" diff --git a/src/pip/_vendor/requests/models.py b/src/pip/_vendor/requests/models.py index b45e8103258..76e6f199c00 100644 --- a/src/pip/_vendor/requests/models.py +++ b/src/pip/_vendor/requests/models.py @@ -438,7 +438,7 @@ def prepare_url(self, url, params): if not scheme: raise MissingSchema( f"Invalid URL {url!r}: No scheme supplied. " - f"Perhaps you meant http://{url}?" + f"Perhaps you meant https://{url}?" ) if not host: diff --git a/src/pip/_vendor/vendor.txt b/src/pip/_vendor/vendor.txt index 282f627a6a5..f466ed0712d 100644 --- a/src/pip/_vendor/vendor.txt +++ b/src/pip/_vendor/vendor.txt @@ -7,7 +7,7 @@ packaging==21.3 platformdirs==2.6.2 pyparsing==3.0.9 pyproject-hooks==1.0.0 -requests==2.28.1 +requests==2.28.2 certifi==2022.09.24 chardet==5.0.0 idna==3.4 From 1c110bede610c211f284b09058cbd72b1dd9ed2c Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sat, 28 Jan 2023 20:41:31 +0000 Subject: [PATCH 3/6] Upgrade certifi to 2022.12.7 --- news/certifi.vendor.rst | 1 + src/pip/_vendor/certifi/__init__.py | 2 +- src/pip/_vendor/certifi/cacert.pem | 181 ---------------------------- src/pip/_vendor/vendor.txt | 2 +- 4 files changed, 3 insertions(+), 183 deletions(-) create mode 100644 news/certifi.vendor.rst diff --git a/news/certifi.vendor.rst b/news/certifi.vendor.rst new file mode 100644 index 00000000000..f02ba9f4187 --- /dev/null +++ b/news/certifi.vendor.rst @@ -0,0 +1 @@ +Upgrade certifi to 2022.12.7 diff --git a/src/pip/_vendor/certifi/__init__.py b/src/pip/_vendor/certifi/__init__.py index af4bcc1510f..a3546f12555 100644 --- a/src/pip/_vendor/certifi/__init__.py +++ b/src/pip/_vendor/certifi/__init__.py @@ -1,4 +1,4 @@ from .core import contents, where __all__ = ["contents", "where"] -__version__ = "2022.09.24" +__version__ = "2022.12.07" diff --git a/src/pip/_vendor/certifi/cacert.pem b/src/pip/_vendor/certifi/cacert.pem index 40051551137..df9e4e3c755 100644 --- a/src/pip/_vendor/certifi/cacert.pem +++ b/src/pip/_vendor/certifi/cacert.pem @@ -636,37 +636,6 @@ BA6+C4OmF4O5MBKgxTMVBbkN+8cFduPYSo38NBejxiEovjBFMR7HeL5YYTisO+IB ZQ== -----END CERTIFICATE----- -# Issuer: CN=Network Solutions Certificate Authority O=Network Solutions L.L.C. -# Subject: CN=Network Solutions Certificate Authority O=Network Solutions L.L.C. -# Label: "Network Solutions Certificate Authority" -# Serial: 116697915152937497490437556386812487904 -# MD5 Fingerprint: d3:f3:a6:16:c0:fa:6b:1d:59:b1:2d:96:4d:0e:11:2e -# SHA1 Fingerprint: 74:f8:a3:c3:ef:e7:b3:90:06:4b:83:90:3c:21:64:60:20:e5:df:ce -# SHA256 Fingerprint: 15:f0:ba:00:a3:ac:7a:f3:ac:88:4c:07:2b:10:11:a0:77:bd:77:c0:97:f4:01:64:b2:f8:59:8a:bd:83:86:0c ------BEGIN CERTIFICATE----- -MIID5jCCAs6gAwIBAgIQV8szb8JcFuZHFhfjkDFo4DANBgkqhkiG9w0BAQUFADBi -MQswCQYDVQQGEwJVUzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMu -MTAwLgYDVQQDEydOZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3Jp -dHkwHhcNMDYxMjAxMDAwMDAwWhcNMjkxMjMxMjM1OTU5WjBiMQswCQYDVQQGEwJV -UzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMuMTAwLgYDVQQDEydO -ZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwggEiMA0GCSqG -SIb3DQEBAQUAA4IBDwAwggEKAoIBAQDkvH6SMG3G2I4rC7xGzuAnlt7e+foS0zwz -c7MEL7xxjOWftiJgPl9dzgn/ggwbmlFQGiaJ3dVhXRncEg8tCqJDXRfQNJIg6nPP -OCwGJgl6cvf6UDL4wpPTaaIjzkGxzOTVHzbRijr4jGPiFFlp7Q3Tf2vouAPlT2rl -mGNpSAW+Lv8ztumXWWn4Zxmuk2GWRBXTcrA/vGp97Eh/jcOrqnErU2lBUzS1sLnF -BgrEsEX1QV1uiUV7PTsmjHTC5dLRfbIR1PtYMiKagMnc/Qzpf14Dl847ABSHJ3A4 -qY5usyd2mFHgBeMhqxrVhSI8KbWaFsWAqPS7azCPL0YCorEMIuDTAgMBAAGjgZcw -gZQwHQYDVR0OBBYEFCEwyfsA106Y2oeqKtCnLrFAMadMMA4GA1UdDwEB/wQEAwIB -BjAPBgNVHRMBAf8EBTADAQH/MFIGA1UdHwRLMEkwR6BFoEOGQWh0dHA6Ly9jcmwu -bmV0c29sc3NsLmNvbS9OZXR3b3JrU29sdXRpb25zQ2VydGlmaWNhdGVBdXRob3Jp -dHkuY3JsMA0GCSqGSIb3DQEBBQUAA4IBAQC7rkvnt1frf6ott3NHhWrB5KUd5Oc8 -6fRZZXe1eltajSU24HqXLjjAV2CDmAaDn7l2em5Q4LqILPxFzBiwmZVRDuwduIj/ -h1AcgsLj4DKAv6ALR8jDMe+ZZzKATxcheQxpXN5eNK4CtSbqUN9/GGUsyfJj4akH -/nxxH2szJGoeBfcFaMBqEssuXmHLrijTfsK0ZpEmXzwuJF/LWA/rKOyvEZbz3Htv -wKeI8lN3s2Berq4o2jUsbzRF0ybh3uxbTydrFny9RAQYgrOJeRcQcT16ohZO9QHN -pGxlaKFJdlxDydi8NmdspZS11My5vWo1ViHe2MPr+8ukYEywVaCge1ey ------END CERTIFICATE----- - # Issuer: CN=COMODO ECC Certification Authority O=COMODO CA Limited # Subject: CN=COMODO ECC Certification Authority O=COMODO CA Limited # Label: "COMODO ECC Certification Authority" @@ -2204,46 +2173,6 @@ KoZIzj0EAwMDaAAwZQIxAOVpEslu28YxuglB4Zf4+/2a4n0Sye18ZNPLBSWLVtmg xwy8p2Fp8fc74SrL+SvzZpA3 -----END CERTIFICATE----- -# Issuer: CN=Staat der Nederlanden EV Root CA O=Staat der Nederlanden -# Subject: CN=Staat der Nederlanden EV Root CA O=Staat der Nederlanden -# Label: "Staat der Nederlanden EV Root CA" -# Serial: 10000013 -# MD5 Fingerprint: fc:06:af:7b:e8:1a:f1:9a:b4:e8:d2:70:1f:c0:f5:ba -# SHA1 Fingerprint: 76:e2:7e:c1:4f:db:82:c1:c0:a6:75:b5:05:be:3d:29:b4:ed:db:bb -# SHA256 Fingerprint: 4d:24:91:41:4c:fe:95:67:46:ec:4c:ef:a6:cf:6f:72:e2:8a:13:29:43:2f:9d:8a:90:7a:c4:cb:5d:ad:c1:5a ------BEGIN CERTIFICATE----- -MIIFcDCCA1igAwIBAgIEAJiWjTANBgkqhkiG9w0BAQsFADBYMQswCQYDVQQGEwJO -TDEeMBwGA1UECgwVU3RhYXQgZGVyIE5lZGVybGFuZGVuMSkwJwYDVQQDDCBTdGFh -dCBkZXIgTmVkZXJsYW5kZW4gRVYgUm9vdCBDQTAeFw0xMDEyMDgxMTE5MjlaFw0y -MjEyMDgxMTEwMjhaMFgxCzAJBgNVBAYTAk5MMR4wHAYDVQQKDBVTdGFhdCBkZXIg -TmVkZXJsYW5kZW4xKTAnBgNVBAMMIFN0YWF0IGRlciBOZWRlcmxhbmRlbiBFViBS -b290IENBMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA48d+ifkkSzrS -M4M1LGns3Amk41GoJSt5uAg94JG6hIXGhaTK5skuU6TJJB79VWZxXSzFYGgEt9nC -UiY4iKTWO0Cmws0/zZiTs1QUWJZV1VD+hq2kY39ch/aO5ieSZxeSAgMs3NZmdO3d -Z//BYY1jTw+bbRcwJu+r0h8QoPnFfxZpgQNH7R5ojXKhTbImxrpsX23Wr9GxE46p -rfNeaXUmGD5BKyF/7otdBwadQ8QpCiv8Kj6GyzyDOvnJDdrFmeK8eEEzduG/L13l -pJhQDBXd4Pqcfzho0LKmeqfRMb1+ilgnQ7O6M5HTp5gVXJrm0w912fxBmJc+qiXb -j5IusHsMX/FjqTf5m3VpTCgmJdrV8hJwRVXj33NeN/UhbJCONVrJ0yPr08C+eKxC -KFhmpUZtcALXEPlLVPxdhkqHz3/KRawRWrUgUY0viEeXOcDPusBCAUCZSCELa6fS -/ZbV0b5GnUngC6agIk440ME8MLxwjyx1zNDFjFE7PZQIZCZhfbnDZY8UnCHQqv0X -cgOPvZuM5l5Tnrmd74K74bzickFbIZTTRTeU0d8JOV3nI6qaHcptqAqGhYqCvkIH -1vI4gnPah1vlPNOePqc7nvQDs/nxfRN0Av+7oeX6AHkcpmZBiFxgV6YuCcS6/ZrP -px9Aw7vMWgpVSzs4dlG4Y4uElBbmVvMCAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB -/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFP6rAJCYniT8qcwaivsnuL8wbqg7 -MA0GCSqGSIb3DQEBCwUAA4ICAQDPdyxuVr5Os7aEAJSrR8kN0nbHhp8dB9O2tLsI -eK9p0gtJ3jPFrK3CiAJ9Brc1AsFgyb/E6JTe1NOpEyVa/m6irn0F3H3zbPB+po3u -2dfOWBfoqSmuc0iH55vKbimhZF8ZE/euBhD/UcabTVUlT5OZEAFTdfETzsemQUHS -v4ilf0X8rLiltTMMgsT7B/Zq5SWEXwbKwYY5EdtYzXc7LMJMD16a4/CrPmEbUCTC -wPTxGfARKbalGAKb12NMcIxHowNDXLldRqANb/9Zjr7dn3LDWyvfjFvO5QxGbJKy -CqNMVEIYFRIYvdr8unRu/8G2oGTYqV9Vrp9canaW2HNnh/tNf1zuacpzEPuKqf2e -vTY4SUmH9A4U8OmHuD+nT3pajnnUk+S7aFKErGzp85hwVXIy+TSrK0m1zSBi5Dp6 -Z2Orltxtrpfs/J92VoguZs9btsmksNcFuuEnL5O7Jiqik7Ab846+HUCjuTaPPoIa -Gl6I6lD4WeKDRikL40Rc4ZW2aZCaFG+XroHPaO+Zmr615+F/+PoTRxZMzG0IQOeL -eG9QgkRQP2YGiqtDhFZKDyAthg710tvSeopLzaXoTvFeJiUBWSOgftL2fiFX1ye8 -FVdMpEbB4IMeDExNH08GGeL5qPQ6gqGyeUN51q1veieQA6TqJIc/2b3Z6fJfUEkc -7uzXLg== ------END CERTIFICATE----- - # Issuer: CN=IdenTrust Commercial Root CA 1 O=IdenTrust # Subject: CN=IdenTrust Commercial Root CA 1 O=IdenTrust # Label: "IdenTrust Commercial Root CA 1" @@ -2851,116 +2780,6 @@ T8p+ck0LcIymSLumoRT2+1hEmRSuqguTaaApJUqlyyvdimYHFngVV3Eb7PVHhPOe MTd61X8kreS8/f3MboPoDKi3QWwH3b08hpcv0g== -----END CERTIFICATE----- -# Issuer: CN=TrustCor RootCert CA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority -# Subject: CN=TrustCor RootCert CA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority -# Label: "TrustCor RootCert CA-1" -# Serial: 15752444095811006489 -# MD5 Fingerprint: 6e:85:f1:dc:1a:00:d3:22:d5:b2:b2:ac:6b:37:05:45 -# SHA1 Fingerprint: ff:bd:cd:e7:82:c8:43:5e:3c:6f:26:86:5c:ca:a8:3a:45:5b:c3:0a -# SHA256 Fingerprint: d4:0e:9c:86:cd:8f:e4:68:c1:77:69:59:f4:9e:a7:74:fa:54:86:84:b6:c4:06:f3:90:92:61:f4:dc:e2:57:5c ------BEGIN CERTIFICATE----- -MIIEMDCCAxigAwIBAgIJANqb7HHzA7AZMA0GCSqGSIb3DQEBCwUAMIGkMQswCQYD -VQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEk -MCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U -cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRydXN0Q29y -IFJvb3RDZXJ0IENBLTEwHhcNMTYwMjA0MTIzMjE2WhcNMjkxMjMxMTcyMzE2WjCB -pDELMAkGA1UEBhMCUEExDzANBgNVBAgMBlBhbmFtYTEUMBIGA1UEBwwLUGFuYW1h -IENpdHkxJDAiBgNVBAoMG1RydXN0Q29yIFN5c3RlbXMgUy4gZGUgUi5MLjEnMCUG -A1UECwweVHJ1c3RDb3IgQ2VydGlmaWNhdGUgQXV0aG9yaXR5MR8wHQYDVQQDDBZU -cnVzdENvciBSb290Q2VydCBDQS0xMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB -CgKCAQEAv463leLCJhJrMxnHQFgKq1mqjQCj/IDHUHuO1CAmujIS2CNUSSUQIpid -RtLByZ5OGy4sDjjzGiVoHKZaBeYei0i/mJZ0PmnK6bV4pQa81QBeCQryJ3pS/C3V -seq0iWEk8xoT26nPUu0MJLq5nux+AHT6k61sKZKuUbS701e/s/OojZz0JEsq1pme -9J7+wH5COucLlVPat2gOkEz7cD+PSiyU8ybdY2mplNgQTsVHCJCZGxdNuWxu72CV -EY4hgLW9oHPY0LJ3xEXqWib7ZnZ2+AYfYW0PVcWDtxBWcgYHpfOxGgMFZA6dWorW -hnAbJN7+KIor0Gqw/Hqi3LJ5DotlDwIDAQABo2MwYTAdBgNVHQ4EFgQU7mtJPHo/ -DeOxCbeKyKsZn3MzUOcwHwYDVR0jBBgwFoAU7mtJPHo/DeOxCbeKyKsZn3MzUOcw -DwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQAD -ggEBACUY1JGPE+6PHh0RU9otRCkZoB5rMZ5NDp6tPVxBb5UrJKF5mDo4Nvu7Zp5I -/5CQ7z3UuJu0h3U/IJvOcs+hVcFNZKIZBqEHMwwLKeXx6quj7LUKdJDHfXLy11yf -ke+Ri7fc7Waiz45mO7yfOgLgJ90WmMCV1Aqk5IGadZQ1nJBfiDcGrVmVCrDRZ9MZ -yonnMlo2HD6CqFqTvsbQZJG2z9m2GM/bftJlo6bEjhcxwft+dtvTheNYsnd6djts -L1Ac59v2Z3kf9YKVmgenFK+P3CghZwnS1k1aHBkcjndcw5QkPTJrS37UeJSDvjdN -zl/HHk484IkzlQsPpTLWPFp5LBk= ------END CERTIFICATE----- - -# Issuer: CN=TrustCor RootCert CA-2 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority -# Subject: CN=TrustCor RootCert CA-2 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority -# Label: "TrustCor RootCert CA-2" -# Serial: 2711694510199101698 -# MD5 Fingerprint: a2:e1:f8:18:0b:ba:45:d5:c7:41:2a:bb:37:52:45:64 -# SHA1 Fingerprint: b8:be:6d:cb:56:f1:55:b9:63:d4:12:ca:4e:06:34:c7:94:b2:1c:c0 -# SHA256 Fingerprint: 07:53:e9:40:37:8c:1b:d5:e3:83:6e:39:5d:ae:a5:cb:83:9e:50:46:f1:bd:0e:ae:19:51:cf:10:fe:c7:c9:65 ------BEGIN CERTIFICATE----- -MIIGLzCCBBegAwIBAgIIJaHfyjPLWQIwDQYJKoZIhvcNAQELBQAwgaQxCzAJBgNV -BAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQw -IgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRy -dXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0eTEfMB0GA1UEAwwWVHJ1c3RDb3Ig -Um9vdENlcnQgQ0EtMjAeFw0xNjAyMDQxMjMyMjNaFw0zNDEyMzExNzI2MzlaMIGk -MQswCQYDVQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEg -Q2l0eTEkMCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYD -VQQLDB5UcnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRy -dXN0Q29yIFJvb3RDZXJ0IENBLTIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK -AoICAQCnIG7CKqJiJJWQdsg4foDSq8GbZQWU9MEKENUCrO2fk8eHyLAnK0IMPQo+ -QVqedd2NyuCb7GgypGmSaIwLgQ5WoD4a3SwlFIIvl9NkRvRUqdw6VC0xK5mC8tkq -1+9xALgxpL56JAfDQiDyitSSBBtlVkxs1Pu2YVpHI7TYabS3OtB0PAx1oYxOdqHp -2yqlO/rOsP9+aij9JxzIsekp8VduZLTQwRVtDr4uDkbIXvRR/u8OYzo7cbrPb1nK -DOObXUm4TOJXsZiKQlecdu/vvdFoqNL0Cbt3Nb4lggjEFixEIFapRBF37120Hape -az6LMvYHL1cEksr1/p3C6eizjkxLAjHZ5DxIgif3GIJ2SDpxsROhOdUuxTTCHWKF -3wP+TfSvPd9cW436cOGlfifHhi5qjxLGhF5DUVCcGZt45vz27Ud+ez1m7xMTiF88 -oWP7+ayHNZ/zgp6kPwqcMWmLmaSISo5uZk3vFsQPeSghYA2FFn3XVDjxklb9tTNM -g9zXEJ9L/cb4Qr26fHMC4P99zVvh1Kxhe1fVSntb1IVYJ12/+CtgrKAmrhQhJ8Z3 -mjOAPF5GP/fDsaOGM8boXg25NSyqRsGFAnWAoOsk+xWq5Gd/bnc/9ASKL3x74xdh -8N0JqSDIvgmk0H5Ew7IwSjiqqewYmgeCK9u4nBit2uBGF6zPXQIDAQABo2MwYTAd -BgNVHQ4EFgQU2f4hQG6UnrybPZx9mCAZ5YwwYrIwHwYDVR0jBBgwFoAU2f4hQG6U -nrybPZx9mCAZ5YwwYrIwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYw -DQYJKoZIhvcNAQELBQADggIBAJ5Fngw7tu/hOsh80QA9z+LqBrWyOrsGS2h60COX -dKcs8AjYeVrXWoSK2BKaG9l9XE1wxaX5q+WjiYndAfrs3fnpkpfbsEZC89NiqpX+ -MWcUaViQCqoL7jcjx1BRtPV+nuN79+TMQjItSQzL/0kMmx40/W5ulop5A7Zv2wnL -/V9lFDfhOPXzYRZY5LVtDQsEGz9QLX+zx3oaFoBg+Iof6Rsqxvm6ARppv9JYx1RX -CI/hOWB3S6xZhBqI8d3LT3jX5+EzLfzuQfogsL7L9ziUwOHQhQ+77Sxzq+3+knYa -ZH9bDTMJBzN7Bj8RpFxwPIXAz+OQqIN3+tvmxYxoZxBnpVIt8MSZj3+/0WvitUfW -2dCFmU2Umw9Lje4AWkcdEQOsQRivh7dvDDqPys/cA8GiCcjl/YBeyGBCARsaU1q7 -N6a3vLqE6R5sGtRk2tRD/pOLS/IseRYQ1JMLiI+h2IYURpFHmygk71dSTlxCnKr3 -Sewn6EAes6aJInKc9Q0ztFijMDvd1GpUk74aTfOTlPf8hAs/hCBcNANExdqtvArB -As8e5ZTZ845b2EzwnexhF7sUMlQMAimTHpKG9n/v55IFDlndmQguLvqcAFLTxWYp -5KeXRKQOKIETNcX2b2TmQcTVL8w0RSXPQQCWPUouwpaYT05KnJe32x+SMsj/D1Fu -1uwJ ------END CERTIFICATE----- - -# Issuer: CN=TrustCor ECA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority -# Subject: CN=TrustCor ECA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority -# Label: "TrustCor ECA-1" -# Serial: 9548242946988625984 -# MD5 Fingerprint: 27:92:23:1d:0a:f5:40:7c:e9:e6:6b:9d:d8:f5:e7:6c -# SHA1 Fingerprint: 58:d1:df:95:95:67:6b:63:c0:f0:5b:1c:17:4d:8b:84:0b:c8:78:bd -# SHA256 Fingerprint: 5a:88:5d:b1:9c:01:d9:12:c5:75:93:88:93:8c:af:bb:df:03:1a:b2:d4:8e:91:ee:15:58:9b:42:97:1d:03:9c ------BEGIN CERTIFICATE----- -MIIEIDCCAwigAwIBAgIJAISCLF8cYtBAMA0GCSqGSIb3DQEBCwUAMIGcMQswCQYD -VQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEk -MCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U -cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxFzAVBgNVBAMMDlRydXN0Q29y -IEVDQS0xMB4XDTE2MDIwNDEyMzIzM1oXDTI5MTIzMTE3MjgwN1owgZwxCzAJBgNV -BAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQw -IgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRy -dXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0eTEXMBUGA1UEAwwOVHJ1c3RDb3Ig -RUNBLTEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDPj+ARtZ+odnbb -3w9U73NjKYKtR8aja+3+XzP4Q1HpGjORMRegdMTUpwHmspI+ap3tDvl0mEDTPwOA -BoJA6LHip1GnHYMma6ve+heRK9jGrB6xnhkB1Zem6g23xFUfJ3zSCNV2HykVh0A5 -3ThFEXXQmqc04L/NyFIduUd+Dbi7xgz2c1cWWn5DkR9VOsZtRASqnKmcp0yJF4Ou -owReUoCLHhIlERnXDH19MURB6tuvsBzvgdAsxZohmz3tQjtQJvLsznFhBmIhVE5/ -wZ0+fyCMgMsq2JdiyIMzkX2woloPV+g7zPIlstR8L+xNxqE6FXrntl019fZISjZF -ZtS6mFjBAgMBAAGjYzBhMB0GA1UdDgQWBBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAf -BgNVHSMEGDAWgBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAPBgNVHRMBAf8EBTADAQH/ -MA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAQEABT41XBVwm8nHc2Fv -civUwo/yQ10CzsSUuZQRg2dd4mdsdXa/uwyqNsatR5Nj3B5+1t4u/ukZMjgDfxT2 -AHMsWbEhBuH7rBiVDKP/mZb3Kyeb1STMHd3BOuCYRLDE5D53sXOpZCz2HAF8P11F -hcCF5yWPldwX8zyfGm6wyuMdKulMY/okYWLW2n62HGz1Ah3UKt1VkOsqEUc8Ll50 -soIipX1TH0XsJ5F95yIW6MBoNtjG8U+ARDL54dHRHareqKucBK+tIA5kmE2la8BI -WJZpTdwHjFGTot+fDz2LYLSCjaoITmJF4PkL0uDgPFveXHEnJcLmA4GLEFPjx1Wi -tJ/X5g== ------END CERTIFICATE----- - # Issuer: CN=SSL.com Root Certification Authority RSA O=SSL Corporation # Subject: CN=SSL.com Root Certification Authority RSA O=SSL Corporation # Label: "SSL.com Root Certification Authority RSA" diff --git a/src/pip/_vendor/vendor.txt b/src/pip/_vendor/vendor.txt index f466ed0712d..cd42578f4d3 100644 --- a/src/pip/_vendor/vendor.txt +++ b/src/pip/_vendor/vendor.txt @@ -8,7 +8,7 @@ platformdirs==2.6.2 pyparsing==3.0.9 pyproject-hooks==1.0.0 requests==2.28.2 - certifi==2022.09.24 + certifi==2022.12.7 chardet==5.0.0 idna==3.4 urllib3==1.26.12 From be20a75c108b5db5ca0dc097e6f46a3ebccfd48a Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sat, 28 Jan 2023 20:41:43 +0000 Subject: [PATCH 4/6] Upgrade chardet to 5.1.0 --- news/chardet.vendor.rst | 1 + src/pip/_vendor/chardet.pyi | 1 - src/pip/_vendor/chardet/__init__.py | 36 +++- src/pip/_vendor/chardet/big5prober.py | 6 +- src/pip/_vendor/chardet/chardistribution.py | 54 +++--- src/pip/_vendor/chardet/charsetgroupprober.py | 31 ++-- src/pip/_vendor/chardet/charsetprober.py | 35 ++-- src/pip/_vendor/chardet/cli/chardetect.py | 42 ++++- src/pip/_vendor/chardet/codingstatemachine.py | 16 +- .../_vendor/chardet/codingstatemachinedict.py | 19 ++ src/pip/_vendor/chardet/cp949prober.py | 6 +- src/pip/_vendor/chardet/enums.py | 9 +- src/pip/_vendor/chardet/escprober.py | 26 +-- src/pip/_vendor/chardet/escsm.py | 9 +- src/pip/_vendor/chardet/eucjpprober.py | 19 +- src/pip/_vendor/chardet/euckrprober.py | 6 +- src/pip/_vendor/chardet/euctwprober.py | 6 +- src/pip/_vendor/chardet/gb2312prober.py | 6 +- src/pip/_vendor/chardet/hebrewprober.py | 56 +++--- src/pip/_vendor/chardet/johabprober.py | 6 +- src/pip/_vendor/chardet/jpcntx.py | 31 ++-- src/pip/_vendor/chardet/latin1prober.py | 18 +- src/pip/_vendor/chardet/macromanprober.py | 162 ++++++++++++++++++ src/pip/_vendor/chardet/mbcharsetprober.py | 32 ++-- src/pip/_vendor/chardet/mbcsgroupprober.py | 3 +- src/pip/_vendor/chardet/mbcssm.py | 23 +-- src/pip/_vendor/chardet/metadata/languages.py | 37 ++-- src/pip/_vendor/chardet/py.typed | 0 src/pip/_vendor/chardet/resultdict.py | 16 ++ src/pip/_vendor/chardet/sbcharsetprober.py | 52 +++--- src/pip/_vendor/chardet/sbcsgroupprober.py | 2 +- src/pip/_vendor/chardet/sjisprober.py | 19 +- src/pip/_vendor/chardet/universaldetector.py | 68 ++++++-- src/pip/_vendor/chardet/utf1632prober.py | 32 ++-- src/pip/_vendor/chardet/utf8prober.py | 16 +- src/pip/_vendor/chardet/version.py | 4 +- src/pip/_vendor/vendor.txt | 2 +- 37 files changed, 620 insertions(+), 287 deletions(-) create mode 100644 news/chardet.vendor.rst delete mode 100644 src/pip/_vendor/chardet.pyi create mode 100644 src/pip/_vendor/chardet/codingstatemachinedict.py create mode 100644 src/pip/_vendor/chardet/macromanprober.py create mode 100644 src/pip/_vendor/chardet/py.typed create mode 100644 src/pip/_vendor/chardet/resultdict.py diff --git a/news/chardet.vendor.rst b/news/chardet.vendor.rst new file mode 100644 index 00000000000..5aceb6c5e6f --- /dev/null +++ b/news/chardet.vendor.rst @@ -0,0 +1 @@ +Upgrade chardet to 5.1.0 diff --git a/src/pip/_vendor/chardet.pyi b/src/pip/_vendor/chardet.pyi deleted file mode 100644 index 29e87e33157..00000000000 --- a/src/pip/_vendor/chardet.pyi +++ /dev/null @@ -1 +0,0 @@ -from chardet import * \ No newline at end of file diff --git a/src/pip/_vendor/chardet/__init__.py b/src/pip/_vendor/chardet/__init__.py index e91ad61822c..fe581623d89 100644 --- a/src/pip/_vendor/chardet/__init__.py +++ b/src/pip/_vendor/chardet/__init__.py @@ -15,19 +15,29 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import List, Union + +from .charsetgroupprober import CharSetGroupProber +from .charsetprober import CharSetProber from .enums import InputState +from .resultdict import ResultDict from .universaldetector import UniversalDetector from .version import VERSION, __version__ __all__ = ["UniversalDetector", "detect", "detect_all", "__version__", "VERSION"] -def detect(byte_str): +def detect( + byte_str: Union[bytes, bytearray], should_rename_legacy: bool = False +) -> ResultDict: """ Detect the encoding of the given byte string. :param byte_str: The byte sequence to examine. :type byte_str: ``bytes`` or ``bytearray`` + :param should_rename_legacy: Should we rename legacy encodings + to their more modern equivalents? + :type should_rename_legacy: ``bool`` """ if not isinstance(byte_str, bytearray): if not isinstance(byte_str, bytes): @@ -35,12 +45,16 @@ def detect(byte_str): f"Expected object of type bytes or bytearray, got: {type(byte_str)}" ) byte_str = bytearray(byte_str) - detector = UniversalDetector() + detector = UniversalDetector(should_rename_legacy=should_rename_legacy) detector.feed(byte_str) return detector.close() -def detect_all(byte_str, ignore_threshold=False): +def detect_all( + byte_str: Union[bytes, bytearray], + ignore_threshold: bool = False, + should_rename_legacy: bool = False, +) -> List[ResultDict]: """ Detect all the possible encodings of the given byte string. @@ -50,6 +64,9 @@ def detect_all(byte_str, ignore_threshold=False): ``UniversalDetector.MINIMUM_THRESHOLD`` in results. :type ignore_threshold: ``bool`` + :param should_rename_legacy: Should we rename legacy encodings + to their more modern equivalents? + :type should_rename_legacy: ``bool`` """ if not isinstance(byte_str, bytearray): if not isinstance(byte_str, bytes): @@ -58,15 +75,15 @@ def detect_all(byte_str, ignore_threshold=False): ) byte_str = bytearray(byte_str) - detector = UniversalDetector() + detector = UniversalDetector(should_rename_legacy=should_rename_legacy) detector.feed(byte_str) detector.close() if detector.input_state == InputState.HIGH_BYTE: - results = [] - probers = [] + results: List[ResultDict] = [] + probers: List[CharSetProber] = [] for prober in detector.charset_probers: - if hasattr(prober, "probers"): + if isinstance(prober, CharSetGroupProber): probers.extend(p for p in prober.probers) else: probers.append(prober) @@ -80,6 +97,11 @@ def detect_all(byte_str, ignore_threshold=False): charset_name = detector.ISO_WIN_MAP.get( lower_charset_name, charset_name ) + # Rename legacy encodings with superset encodings if asked + if should_rename_legacy: + charset_name = detector.LEGACY_MAP.get( + charset_name.lower(), charset_name + ) results.append( { "encoding": charset_name, diff --git a/src/pip/_vendor/chardet/big5prober.py b/src/pip/_vendor/chardet/big5prober.py index e4dfa7aa02a..ef09c60e327 100644 --- a/src/pip/_vendor/chardet/big5prober.py +++ b/src/pip/_vendor/chardet/big5prober.py @@ -32,16 +32,16 @@ class Big5Prober(MultiByteCharSetProber): - def __init__(self): + def __init__(self) -> None: super().__init__() self.coding_sm = CodingStateMachine(BIG5_SM_MODEL) self.distribution_analyzer = Big5DistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "Big5" @property - def language(self): + def language(self) -> str: return "Chinese" diff --git a/src/pip/_vendor/chardet/chardistribution.py b/src/pip/_vendor/chardet/chardistribution.py index 27b4a293911..176cb996408 100644 --- a/src/pip/_vendor/chardet/chardistribution.py +++ b/src/pip/_vendor/chardet/chardistribution.py @@ -25,6 +25,8 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Tuple, Union + from .big5freq import ( BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE, @@ -59,22 +61,22 @@ class CharDistributionAnalysis: SURE_NO = 0.01 MINIMUM_DATA_THRESHOLD = 3 - def __init__(self): + def __init__(self) -> None: # Mapping table to get frequency order from char order (get from # GetOrder()) - self._char_to_freq_order = tuple() - self._table_size = None # Size of above table + self._char_to_freq_order: Tuple[int, ...] = tuple() + self._table_size = 0 # Size of above table # This is a constant value which varies from language to language, # used in calculating confidence. See # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html # for further detail. - self.typical_distribution_ratio = None - self._done = None - self._total_chars = None - self._freq_chars = None + self.typical_distribution_ratio = 0.0 + self._done = False + self._total_chars = 0 + self._freq_chars = 0 self.reset() - def reset(self): + def reset(self) -> None: """reset analyser, clear any state""" # If this flag is set to True, detection is done and conclusion has # been made @@ -83,7 +85,7 @@ def reset(self): # The number of characters whose frequency order is less than 512 self._freq_chars = 0 - def feed(self, char, char_len): + def feed(self, char: Union[bytes, bytearray], char_len: int) -> None: """feed a character with known length""" if char_len == 2: # we only care about 2-bytes character in our distribution analysis @@ -97,7 +99,7 @@ def feed(self, char, char_len): if 512 > self._char_to_freq_order[order]: self._freq_chars += 1 - def get_confidence(self): + def get_confidence(self) -> float: """return confidence based on existing data""" # if we didn't receive any character in our consideration range, # return negative answer @@ -114,12 +116,12 @@ def get_confidence(self): # normalize confidence (we don't want to be 100% sure) return self.SURE_YES - def got_enough_data(self): + def got_enough_data(self) -> bool: # It is not necessary to receive all data to draw conclusion. # For charset detection, certain amount of data is enough return self._total_chars > self.ENOUGH_DATA_THRESHOLD - def get_order(self, _): + def get_order(self, _: Union[bytes, bytearray]) -> int: # We do not handle characters based on the original encoding string, # but convert this encoding string to a number, here called order. # This allows multiple encodings of a language to share one frequency @@ -128,13 +130,13 @@ def get_order(self, _): class EUCTWDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): + def __init__(self) -> None: super().__init__() self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER self._table_size = EUCTW_TABLE_SIZE self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for euc-TW encoding, we are interested # first byte range: 0xc4 -- 0xfe # second byte range: 0xa1 -- 0xfe @@ -146,13 +148,13 @@ def get_order(self, byte_str): class EUCKRDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): + def __init__(self) -> None: super().__init__() self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER self._table_size = EUCKR_TABLE_SIZE self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for euc-KR encoding, we are interested # first byte range: 0xb0 -- 0xfe # second byte range: 0xa1 -- 0xfe @@ -164,13 +166,13 @@ def get_order(self, byte_str): class JOHABDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): + def __init__(self) -> None: super().__init__() self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER self._table_size = EUCKR_TABLE_SIZE self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: first_char = byte_str[0] if 0x88 <= first_char < 0xD4: code = first_char * 256 + byte_str[1] @@ -179,13 +181,13 @@ def get_order(self, byte_str): class GB2312DistributionAnalysis(CharDistributionAnalysis): - def __init__(self): + def __init__(self) -> None: super().__init__() self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER self._table_size = GB2312_TABLE_SIZE self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for GB2312 encoding, we are interested # first byte range: 0xb0 -- 0xfe # second byte range: 0xa1 -- 0xfe @@ -197,13 +199,13 @@ def get_order(self, byte_str): class Big5DistributionAnalysis(CharDistributionAnalysis): - def __init__(self): + def __init__(self) -> None: super().__init__() self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER self._table_size = BIG5_TABLE_SIZE self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for big5 encoding, we are interested # first byte range: 0xa4 -- 0xfe # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe @@ -217,13 +219,13 @@ def get_order(self, byte_str): class SJISDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): + def __init__(self) -> None: super().__init__() self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER self._table_size = JIS_TABLE_SIZE self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for sjis encoding, we are interested # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe @@ -242,13 +244,13 @@ def get_order(self, byte_str): class EUCJPDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): + def __init__(self) -> None: super().__init__() self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER self._table_size = JIS_TABLE_SIZE self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for euc-JP encoding, we are interested # first byte range: 0xa0 -- 0xfe # second byte range: 0xa1 -- 0xfe diff --git a/src/pip/_vendor/chardet/charsetgroupprober.py b/src/pip/_vendor/chardet/charsetgroupprober.py index 778ff332bbd..6def56b4a75 100644 --- a/src/pip/_vendor/chardet/charsetgroupprober.py +++ b/src/pip/_vendor/chardet/charsetgroupprober.py @@ -25,29 +25,30 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import List, Optional, Union + from .charsetprober import CharSetProber -from .enums import ProbingState +from .enums import LanguageFilter, ProbingState class CharSetGroupProber(CharSetProber): - def __init__(self, lang_filter=None): + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: super().__init__(lang_filter=lang_filter) self._active_num = 0 - self.probers = [] - self._best_guess_prober = None + self.probers: List[CharSetProber] = [] + self._best_guess_prober: Optional[CharSetProber] = None - def reset(self): + def reset(self) -> None: super().reset() self._active_num = 0 for prober in self.probers: - if prober: - prober.reset() - prober.active = True - self._active_num += 1 + prober.reset() + prober.active = True + self._active_num += 1 self._best_guess_prober = None @property - def charset_name(self): + def charset_name(self) -> Optional[str]: if not self._best_guess_prober: self.get_confidence() if not self._best_guess_prober: @@ -55,17 +56,15 @@ def charset_name(self): return self._best_guess_prober.charset_name @property - def language(self): + def language(self) -> Optional[str]: if not self._best_guess_prober: self.get_confidence() if not self._best_guess_prober: return None return self._best_guess_prober.language - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: for prober in self.probers: - if not prober: - continue if not prober.active: continue state = prober.feed(byte_str) @@ -83,7 +82,7 @@ def feed(self, byte_str): return self.state return self.state - def get_confidence(self): + def get_confidence(self) -> float: state = self.state if state == ProbingState.FOUND_IT: return 0.99 @@ -92,8 +91,6 @@ def get_confidence(self): best_conf = 0.0 self._best_guess_prober = None for prober in self.probers: - if not prober: - continue if not prober.active: self.logger.debug("%s not active", prober.charset_name) continue diff --git a/src/pip/_vendor/chardet/charsetprober.py b/src/pip/_vendor/chardet/charsetprober.py index 9f1afd999c1..a103ca11356 100644 --- a/src/pip/_vendor/chardet/charsetprober.py +++ b/src/pip/_vendor/chardet/charsetprober.py @@ -28,8 +28,9 @@ import logging import re +from typing import Optional, Union -from .enums import ProbingState +from .enums import LanguageFilter, ProbingState INTERNATIONAL_WORDS_PATTERN = re.compile( b"[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?" @@ -40,35 +41,40 @@ class CharSetProber: SHORTCUT_THRESHOLD = 0.95 - def __init__(self, lang_filter=None): - self._state = None + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: + self._state = ProbingState.DETECTING + self.active = True self.lang_filter = lang_filter self.logger = logging.getLogger(__name__) - def reset(self): + def reset(self) -> None: self._state = ProbingState.DETECTING @property - def charset_name(self): + def charset_name(self) -> Optional[str]: return None - def feed(self, byte_str): + @property + def language(self) -> Optional[str]: + raise NotImplementedError + + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: raise NotImplementedError @property - def state(self): + def state(self) -> ProbingState: return self._state - def get_confidence(self): + def get_confidence(self) -> float: return 0.0 @staticmethod - def filter_high_byte_only(buf): + def filter_high_byte_only(buf: Union[bytes, bytearray]) -> bytes: buf = re.sub(b"([\x00-\x7F])+", b" ", buf) return buf @staticmethod - def filter_international_words(buf): + def filter_international_words(buf: Union[bytes, bytearray]) -> bytearray: """ We define three types of bytes: alphabet: english alphabets [a-zA-Z] @@ -102,7 +108,7 @@ def filter_international_words(buf): return filtered @staticmethod - def remove_xml_tags(buf): + def remove_xml_tags(buf: Union[bytes, bytearray]) -> bytes: """ Returns a copy of ``buf`` that retains only the sequences of English alphabet and high byte characters that are not between <> characters. @@ -117,10 +123,13 @@ def remove_xml_tags(buf): for curr, buf_char in enumerate(buf): # Check if we're coming out of or entering an XML tag - if buf_char == b">": + + # https://github.com/python/typeshed/issues/8182 + if buf_char == b">": # type: ignore[comparison-overlap] prev = curr + 1 in_tag = False - elif buf_char == b"<": + # https://github.com/python/typeshed/issues/8182 + elif buf_char == b"<": # type: ignore[comparison-overlap] if curr > prev and not in_tag: # Keep everything after last non-extended-ASCII, # non-alphabetic character diff --git a/src/pip/_vendor/chardet/cli/chardetect.py b/src/pip/_vendor/chardet/cli/chardetect.py index 7926fa37e38..43f6e144f67 100644 --- a/src/pip/_vendor/chardet/cli/chardetect.py +++ b/src/pip/_vendor/chardet/cli/chardetect.py @@ -15,12 +15,18 @@ import argparse import sys +from typing import Iterable, List, Optional from .. import __version__ from ..universaldetector import UniversalDetector -def description_of(lines, name="stdin"): +def description_of( + lines: Iterable[bytes], + name: str = "stdin", + minimal: bool = False, + should_rename_legacy: bool = False, +) -> Optional[str]: """ Return a string describing the probable encoding of a file or list of strings. @@ -29,8 +35,11 @@ def description_of(lines, name="stdin"): :type lines: Iterable of bytes :param name: Name of file or collection of lines :type name: str + :param should_rename_legacy: Should we rename legacy encodings to + their more modern equivalents? + :type should_rename_legacy: ``bool`` """ - u = UniversalDetector() + u = UniversalDetector(should_rename_legacy=should_rename_legacy) for line in lines: line = bytearray(line) u.feed(line) @@ -39,12 +48,14 @@ def description_of(lines, name="stdin"): break u.close() result = u.result + if minimal: + return result["encoding"] if result["encoding"]: return f'{name}: {result["encoding"]} with confidence {result["confidence"]}' return f"{name}: no result" -def main(argv=None): +def main(argv: Optional[List[str]] = None) -> None: """ Handles command line arguments and gets things started. @@ -54,17 +65,28 @@ def main(argv=None): """ # Get command line arguments parser = argparse.ArgumentParser( - description="Takes one or more file paths and reports their detected \ - encodings" + description=( + "Takes one or more file paths and reports their detected encodings" + ) ) parser.add_argument( "input", - help="File whose encoding we would like to determine. \ - (default: stdin)", + help="File whose encoding we would like to determine. (default: stdin)", type=argparse.FileType("rb"), nargs="*", default=[sys.stdin.buffer], ) + parser.add_argument( + "--minimal", + help="Print only the encoding to standard output", + action="store_true", + ) + parser.add_argument( + "-l", + "--legacy", + help="Rename legacy encodings to more modern ones.", + action="store_true", + ) parser.add_argument( "--version", action="version", version=f"%(prog)s {__version__}" ) @@ -79,7 +101,11 @@ def main(argv=None): "--help\n", file=sys.stderr, ) - print(description_of(f, f.name)) + print( + description_of( + f, f.name, minimal=args.minimal, should_rename_legacy=args.legacy + ) + ) if __name__ == "__main__": diff --git a/src/pip/_vendor/chardet/codingstatemachine.py b/src/pip/_vendor/chardet/codingstatemachine.py index d3e3e825d6d..8ed4a8773b8 100644 --- a/src/pip/_vendor/chardet/codingstatemachine.py +++ b/src/pip/_vendor/chardet/codingstatemachine.py @@ -27,6 +27,7 @@ import logging +from .codingstatemachinedict import CodingStateMachineDict from .enums import MachineState @@ -53,18 +54,19 @@ class CodingStateMachine: encoding from consideration from here on. """ - def __init__(self, sm): + def __init__(self, sm: CodingStateMachineDict) -> None: self._model = sm self._curr_byte_pos = 0 self._curr_char_len = 0 - self._curr_state = None + self._curr_state = MachineState.START + self.active = True self.logger = logging.getLogger(__name__) self.reset() - def reset(self): + def reset(self) -> None: self._curr_state = MachineState.START - def next_state(self, c): + def next_state(self, c: int) -> int: # for each byte we get its class # if it is first byte, we also get byte length byte_class = self._model["class_table"][c] @@ -77,12 +79,12 @@ def next_state(self, c): self._curr_byte_pos += 1 return self._curr_state - def get_current_charlen(self): + def get_current_charlen(self) -> int: return self._curr_char_len - def get_coding_state_machine(self): + def get_coding_state_machine(self) -> str: return self._model["name"] @property - def language(self): + def language(self) -> str: return self._model["language"] diff --git a/src/pip/_vendor/chardet/codingstatemachinedict.py b/src/pip/_vendor/chardet/codingstatemachinedict.py new file mode 100644 index 00000000000..7a3c4c7e3fe --- /dev/null +++ b/src/pip/_vendor/chardet/codingstatemachinedict.py @@ -0,0 +1,19 @@ +from typing import TYPE_CHECKING, Tuple + +if TYPE_CHECKING: + # TypedDict was introduced in Python 3.8. + # + # TODO: Remove the else block and TYPE_CHECKING check when dropping support + # for Python 3.7. + from typing import TypedDict + + class CodingStateMachineDict(TypedDict, total=False): + class_table: Tuple[int, ...] + class_factor: int + state_table: Tuple[int, ...] + char_len_table: Tuple[int, ...] + name: str + language: str # Optional key + +else: + CodingStateMachineDict = dict diff --git a/src/pip/_vendor/chardet/cp949prober.py b/src/pip/_vendor/chardet/cp949prober.py index 28a1f3dbb57..fa7307ed898 100644 --- a/src/pip/_vendor/chardet/cp949prober.py +++ b/src/pip/_vendor/chardet/cp949prober.py @@ -32,7 +32,7 @@ class CP949Prober(MultiByteCharSetProber): - def __init__(self): + def __init__(self) -> None: super().__init__() self.coding_sm = CodingStateMachine(CP949_SM_MODEL) # NOTE: CP949 is a superset of EUC-KR, so the distribution should be @@ -41,9 +41,9 @@ def __init__(self): self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "CP949" @property - def language(self): + def language(self) -> str: return "Korean" diff --git a/src/pip/_vendor/chardet/enums.py b/src/pip/_vendor/chardet/enums.py index 32a77e76c61..5e3e1982336 100644 --- a/src/pip/_vendor/chardet/enums.py +++ b/src/pip/_vendor/chardet/enums.py @@ -4,6 +4,8 @@ :author: Dan Blanchard (dan.blanchard@gmail.com) """ +from enum import Enum, Flag + class InputState: """ @@ -15,12 +17,13 @@ class InputState: HIGH_BYTE = 2 -class LanguageFilter: +class LanguageFilter(Flag): """ This enum represents the different language filters we can apply to a ``UniversalDetector``. """ + NONE = 0x00 CHINESE_SIMPLIFIED = 0x01 CHINESE_TRADITIONAL = 0x02 JAPANESE = 0x04 @@ -31,7 +34,7 @@ class LanguageFilter: CJK = CHINESE | JAPANESE | KOREAN -class ProbingState: +class ProbingState(Enum): """ This enum represents the different states a prober can be in. """ @@ -62,7 +65,7 @@ class SequenceLikelihood: POSITIVE = 3 @classmethod - def get_num_categories(cls): + def get_num_categories(cls) -> int: """:returns: The number of likelihood categories in the enum.""" return 4 diff --git a/src/pip/_vendor/chardet/escprober.py b/src/pip/_vendor/chardet/escprober.py index d9926115dad..fd713830d36 100644 --- a/src/pip/_vendor/chardet/escprober.py +++ b/src/pip/_vendor/chardet/escprober.py @@ -25,6 +25,8 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Optional, Union + from .charsetprober import CharSetProber from .codingstatemachine import CodingStateMachine from .enums import LanguageFilter, MachineState, ProbingState @@ -43,7 +45,7 @@ class EscCharSetProber(CharSetProber): identify these encodings. """ - def __init__(self, lang_filter=None): + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: super().__init__(lang_filter=lang_filter) self.coding_sm = [] if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED: @@ -53,17 +55,15 @@ def __init__(self, lang_filter=None): self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL)) if self.lang_filter & LanguageFilter.KOREAN: self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL)) - self.active_sm_count = None - self._detected_charset = None - self._detected_language = None - self._state = None + self.active_sm_count = 0 + self._detected_charset: Optional[str] = None + self._detected_language: Optional[str] = None + self._state = ProbingState.DETECTING self.reset() - def reset(self): + def reset(self) -> None: super().reset() for coding_sm in self.coding_sm: - if not coding_sm: - continue coding_sm.active = True coding_sm.reset() self.active_sm_count = len(self.coding_sm) @@ -71,20 +71,20 @@ def reset(self): self._detected_language = None @property - def charset_name(self): + def charset_name(self) -> Optional[str]: return self._detected_charset @property - def language(self): + def language(self) -> Optional[str]: return self._detected_language - def get_confidence(self): + def get_confidence(self) -> float: return 0.99 if self._detected_charset else 0.00 - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: for c in byte_str: for coding_sm in self.coding_sm: - if not coding_sm or not coding_sm.active: + if not coding_sm.active: continue coding_state = coding_sm.next_state(c) if coding_state == MachineState.ERROR: diff --git a/src/pip/_vendor/chardet/escsm.py b/src/pip/_vendor/chardet/escsm.py index 3aa0f4d962d..11d4adf771f 100644 --- a/src/pip/_vendor/chardet/escsm.py +++ b/src/pip/_vendor/chardet/escsm.py @@ -25,6 +25,7 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from .codingstatemachinedict import CodingStateMachineDict from .enums import MachineState # fmt: off @@ -75,7 +76,7 @@ HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) -HZ_SM_MODEL = { +HZ_SM_MODEL: CodingStateMachineDict = { "class_table": HZ_CLS, "class_factor": 6, "state_table": HZ_ST, @@ -134,7 +135,7 @@ ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0) -ISO2022CN_SM_MODEL = { +ISO2022CN_SM_MODEL: CodingStateMachineDict = { "class_table": ISO2022CN_CLS, "class_factor": 9, "state_table": ISO2022CN_ST, @@ -194,7 +195,7 @@ ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) -ISO2022JP_SM_MODEL = { +ISO2022JP_SM_MODEL: CodingStateMachineDict = { "class_table": ISO2022JP_CLS, "class_factor": 10, "state_table": ISO2022JP_ST, @@ -250,7 +251,7 @@ ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) -ISO2022KR_SM_MODEL = { +ISO2022KR_SM_MODEL: CodingStateMachineDict = { "class_table": ISO2022KR_CLS, "class_factor": 6, "state_table": ISO2022KR_ST, diff --git a/src/pip/_vendor/chardet/eucjpprober.py b/src/pip/_vendor/chardet/eucjpprober.py index abf2e66e283..39487f4098d 100644 --- a/src/pip/_vendor/chardet/eucjpprober.py +++ b/src/pip/_vendor/chardet/eucjpprober.py @@ -25,6 +25,8 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Union + from .chardistribution import EUCJPDistributionAnalysis from .codingstatemachine import CodingStateMachine from .enums import MachineState, ProbingState @@ -34,26 +36,29 @@ class EUCJPProber(MultiByteCharSetProber): - def __init__(self): + def __init__(self) -> None: super().__init__() self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL) self.distribution_analyzer = EUCJPDistributionAnalysis() self.context_analyzer = EUCJPContextAnalysis() self.reset() - def reset(self): + def reset(self) -> None: super().reset() self.context_analyzer.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "EUC-JP" @property - def language(self): + def language(self) -> str: return "Japanese" - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + assert self.coding_sm is not None + assert self.distribution_analyzer is not None + for i, byte in enumerate(byte_str): # PY3K: byte_str is a byte array, so byte is an int, not a byte coding_state = self.coding_sm.next_state(byte) @@ -89,7 +94,9 @@ def feed(self, byte_str): return self.state - def get_confidence(self): + def get_confidence(self) -> float: + assert self.distribution_analyzer is not None + context_conf = self.context_analyzer.get_confidence() distrib_conf = self.distribution_analyzer.get_confidence() return max(context_conf, distrib_conf) diff --git a/src/pip/_vendor/chardet/euckrprober.py b/src/pip/_vendor/chardet/euckrprober.py index 154a6d2162b..1fc5de0462c 100644 --- a/src/pip/_vendor/chardet/euckrprober.py +++ b/src/pip/_vendor/chardet/euckrprober.py @@ -32,16 +32,16 @@ class EUCKRProber(MultiByteCharSetProber): - def __init__(self): + def __init__(self) -> None: super().__init__() self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL) self.distribution_analyzer = EUCKRDistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "EUC-KR" @property - def language(self): + def language(self) -> str: return "Korean" diff --git a/src/pip/_vendor/chardet/euctwprober.py b/src/pip/_vendor/chardet/euctwprober.py index ca10a23ca43..a37ab189958 100644 --- a/src/pip/_vendor/chardet/euctwprober.py +++ b/src/pip/_vendor/chardet/euctwprober.py @@ -32,16 +32,16 @@ class EUCTWProber(MultiByteCharSetProber): - def __init__(self): + def __init__(self) -> None: super().__init__() self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL) self.distribution_analyzer = EUCTWDistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "EUC-TW" @property - def language(self): + def language(self) -> str: return "Taiwan" diff --git a/src/pip/_vendor/chardet/gb2312prober.py b/src/pip/_vendor/chardet/gb2312prober.py index 251c042955e..d423e7311e2 100644 --- a/src/pip/_vendor/chardet/gb2312prober.py +++ b/src/pip/_vendor/chardet/gb2312prober.py @@ -32,16 +32,16 @@ class GB2312Prober(MultiByteCharSetProber): - def __init__(self): + def __init__(self) -> None: super().__init__() self.coding_sm = CodingStateMachine(GB2312_SM_MODEL) self.distribution_analyzer = GB2312DistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "GB2312" @property - def language(self): + def language(self) -> str: return "Chinese" diff --git a/src/pip/_vendor/chardet/hebrewprober.py b/src/pip/_vendor/chardet/hebrewprober.py index 3ca634bf373..785d0057bcc 100644 --- a/src/pip/_vendor/chardet/hebrewprober.py +++ b/src/pip/_vendor/chardet/hebrewprober.py @@ -25,8 +25,11 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Optional, Union + from .charsetprober import CharSetProber from .enums import ProbingState +from .sbcharsetprober import SingleByteCharSetProber # This prober doesn't actually recognize a language or a charset. # It is a helper prober for the use of the Hebrew model probers @@ -127,6 +130,7 @@ class HebrewProber(CharSetProber): + SPACE = 0x20 # windows-1255 / ISO-8859-8 code points of interest FINAL_KAF = 0xEA NORMAL_KAF = 0xEB @@ -152,31 +156,35 @@ class HebrewProber(CharSetProber): VISUAL_HEBREW_NAME = "ISO-8859-8" LOGICAL_HEBREW_NAME = "windows-1255" - def __init__(self): + def __init__(self) -> None: super().__init__() - self._final_char_logical_score = None - self._final_char_visual_score = None - self._prev = None - self._before_prev = None - self._logical_prober = None - self._visual_prober = None + self._final_char_logical_score = 0 + self._final_char_visual_score = 0 + self._prev = self.SPACE + self._before_prev = self.SPACE + self._logical_prober: Optional[SingleByteCharSetProber] = None + self._visual_prober: Optional[SingleByteCharSetProber] = None self.reset() - def reset(self): + def reset(self) -> None: self._final_char_logical_score = 0 self._final_char_visual_score = 0 # The two last characters seen in the previous buffer, # mPrev and mBeforePrev are initialized to space in order to simulate # a word delimiter at the beginning of the data - self._prev = " " - self._before_prev = " " + self._prev = self.SPACE + self._before_prev = self.SPACE # These probers are owned by the group prober. - def set_model_probers(self, logical_prober, visual_prober): + def set_model_probers( + self, + logical_prober: SingleByteCharSetProber, + visual_prober: SingleByteCharSetProber, + ) -> None: self._logical_prober = logical_prober self._visual_prober = visual_prober - def is_final(self, c): + def is_final(self, c: int) -> bool: return c in [ self.FINAL_KAF, self.FINAL_MEM, @@ -185,7 +193,7 @@ def is_final(self, c): self.FINAL_TSADI, ] - def is_non_final(self, c): + def is_non_final(self, c: int) -> bool: # The normal Tsadi is not a good Non-Final letter due to words like # 'lechotet' (to chat) containing an apostrophe after the tsadi. This # apostrophe is converted to a space in FilterWithoutEnglishLetters @@ -198,7 +206,7 @@ def is_non_final(self, c): # since these words are quite rare. return c in [self.NORMAL_KAF, self.NORMAL_MEM, self.NORMAL_NUN, self.NORMAL_PE] - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: # Final letter analysis for logical-visual decision. # Look for evidence that the received buffer is either logical Hebrew # or visual Hebrew. @@ -232,9 +240,9 @@ def feed(self, byte_str): byte_str = self.filter_high_byte_only(byte_str) for cur in byte_str: - if cur == " ": + if cur == self.SPACE: # We stand on a space - a word just ended - if self._before_prev != " ": + if self._before_prev != self.SPACE: # next-to-last char was not a space so self._prev is not a # 1 letter word if self.is_final(self._prev): @@ -247,9 +255,9 @@ def feed(self, byte_str): else: # Not standing on a space if ( - (self._before_prev == " ") + (self._before_prev == self.SPACE) and (self.is_final(self._prev)) - and (cur != " ") + and (cur != self.SPACE) ): # case (3) [-2:space][-1:final letter][cur:not space] self._final_char_visual_score += 1 @@ -261,7 +269,10 @@ def feed(self, byte_str): return ProbingState.DETECTING @property - def charset_name(self): + def charset_name(self) -> str: + assert self._logical_prober is not None + assert self._visual_prober is not None + # Make the decision: is it Logical or Visual? # If the final letter score distance is dominant enough, rely on it. finalsub = self._final_char_logical_score - self._final_char_visual_score @@ -289,11 +300,14 @@ def charset_name(self): return self.LOGICAL_HEBREW_NAME @property - def language(self): + def language(self) -> str: return "Hebrew" @property - def state(self): + def state(self) -> ProbingState: + assert self._logical_prober is not None + assert self._visual_prober is not None + # Remain active as long as any of the model probers are active. if (self._logical_prober.state == ProbingState.NOT_ME) and ( self._visual_prober.state == ProbingState.NOT_ME diff --git a/src/pip/_vendor/chardet/johabprober.py b/src/pip/_vendor/chardet/johabprober.py index 6f359d193f7..d7364ba61ec 100644 --- a/src/pip/_vendor/chardet/johabprober.py +++ b/src/pip/_vendor/chardet/johabprober.py @@ -32,16 +32,16 @@ class JOHABProber(MultiByteCharSetProber): - def __init__(self): + def __init__(self) -> None: super().__init__() self.coding_sm = CodingStateMachine(JOHAB_SM_MODEL) self.distribution_analyzer = JOHABDistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "Johab" @property - def language(self): + def language(self) -> str: return "Korean" diff --git a/src/pip/_vendor/chardet/jpcntx.py b/src/pip/_vendor/chardet/jpcntx.py index 7a8e5be0623..2f53bdda09e 100644 --- a/src/pip/_vendor/chardet/jpcntx.py +++ b/src/pip/_vendor/chardet/jpcntx.py @@ -25,6 +25,7 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import List, Tuple, Union # This is hiragana 2-char sequence table, the number in each cell represents its frequency category # fmt: off @@ -123,15 +124,15 @@ class JapaneseContextAnalysis: MAX_REL_THRESHOLD = 1000 MINIMUM_DATA_THRESHOLD = 4 - def __init__(self): - self._total_rel = None - self._rel_sample = None - self._need_to_skip_char_num = None - self._last_char_order = None - self._done = None + def __init__(self) -> None: + self._total_rel = 0 + self._rel_sample: List[int] = [] + self._need_to_skip_char_num = 0 + self._last_char_order = -1 + self._done = False self.reset() - def reset(self): + def reset(self) -> None: self._total_rel = 0 # total sequence received # category counters, each integer counts sequence in its category self._rel_sample = [0] * self.NUM_OF_CATEGORY @@ -143,7 +144,7 @@ def reset(self): # been made self._done = False - def feed(self, byte_str, num_bytes): + def feed(self, byte_str: Union[bytes, bytearray], num_bytes: int) -> None: if self._done: return @@ -172,29 +173,29 @@ def feed(self, byte_str, num_bytes): ] += 1 self._last_char_order = order - def got_enough_data(self): + def got_enough_data(self) -> bool: return self._total_rel > self.ENOUGH_REL_THRESHOLD - def get_confidence(self): + def get_confidence(self) -> float: # This is just one way to calculate confidence. It works well for me. if self._total_rel > self.MINIMUM_DATA_THRESHOLD: return (self._total_rel - self._rel_sample[0]) / self._total_rel return self.DONT_KNOW - def get_order(self, _): + def get_order(self, _: Union[bytes, bytearray]) -> Tuple[int, int]: return -1, 1 class SJISContextAnalysis(JapaneseContextAnalysis): - def __init__(self): + def __init__(self) -> None: super().__init__() self._charset_name = "SHIFT_JIS" @property - def charset_name(self): + def charset_name(self) -> str: return self._charset_name - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]: if not byte_str: return -1, 1 # find out current char's byte length @@ -216,7 +217,7 @@ def get_order(self, byte_str): class EUCJPContextAnalysis(JapaneseContextAnalysis): - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]: if not byte_str: return -1, 1 # find out current char's byte length diff --git a/src/pip/_vendor/chardet/latin1prober.py b/src/pip/_vendor/chardet/latin1prober.py index 241f14ab914..59a01d91b87 100644 --- a/src/pip/_vendor/chardet/latin1prober.py +++ b/src/pip/_vendor/chardet/latin1prober.py @@ -26,6 +26,8 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import List, Union + from .charsetprober import CharSetProber from .enums import ProbingState @@ -96,26 +98,26 @@ class Latin1Prober(CharSetProber): - def __init__(self): + def __init__(self) -> None: super().__init__() - self._last_char_class = None - self._freq_counter = None + self._last_char_class = OTH + self._freq_counter: List[int] = [] self.reset() - def reset(self): + def reset(self) -> None: self._last_char_class = OTH self._freq_counter = [0] * FREQ_CAT_NUM super().reset() @property - def charset_name(self): + def charset_name(self) -> str: return "ISO-8859-1" @property - def language(self): + def language(self) -> str: return "" - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: byte_str = self.remove_xml_tags(byte_str) for c in byte_str: char_class = Latin1_CharToClass[c] @@ -128,7 +130,7 @@ def feed(self, byte_str): return self.state - def get_confidence(self): + def get_confidence(self) -> float: if self.state == ProbingState.NOT_ME: return 0.01 diff --git a/src/pip/_vendor/chardet/macromanprober.py b/src/pip/_vendor/chardet/macromanprober.py new file mode 100644 index 00000000000..1425d10ecaa --- /dev/null +++ b/src/pip/_vendor/chardet/macromanprober.py @@ -0,0 +1,162 @@ +######################## BEGIN LICENSE BLOCK ######################## +# This code was modified from latin1prober.py by Rob Speer . +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Rob Speer - adapt to MacRoman encoding +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from typing import List, Union + +from .charsetprober import CharSetProber +from .enums import ProbingState + +FREQ_CAT_NUM = 4 + +UDF = 0 # undefined +OTH = 1 # other +ASC = 2 # ascii capital letter +ASS = 3 # ascii small letter +ACV = 4 # accent capital vowel +ACO = 5 # accent capital other +ASV = 6 # accent small vowel +ASO = 7 # accent small other +ODD = 8 # character that is unlikely to appear +CLASS_NUM = 9 # total classes + +# The change from Latin1 is that we explicitly look for extended characters +# that are infrequently-occurring symbols, and consider them to always be +# improbable. This should let MacRoman get out of the way of more likely +# encodings in most situations. + +# fmt: off +MacRoman_CharToClass = ( + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F + OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 + ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F + OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 + ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F + ACV, ACV, ACO, ACV, ACO, ACV, ACV, ASV, # 80 - 87 + ASV, ASV, ASV, ASV, ASV, ASO, ASV, ASV, # 88 - 8F + ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASV, # 90 - 97 + ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # 98 - 9F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, ASO, # A0 - A7 + OTH, OTH, ODD, ODD, OTH, OTH, ACV, ACV, # A8 - AF + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 + OTH, OTH, OTH, OTH, OTH, OTH, ASV, ASV, # B8 - BF + OTH, OTH, ODD, OTH, ODD, OTH, OTH, OTH, # C0 - C7 + OTH, OTH, OTH, ACV, ACV, ACV, ACV, ASV, # C8 - CF + OTH, OTH, OTH, OTH, OTH, OTH, OTH, ODD, # D0 - D7 + ASV, ACV, ODD, OTH, OTH, OTH, OTH, OTH, # D8 - DF + OTH, OTH, OTH, OTH, OTH, ACV, ACV, ACV, # E0 - E7 + ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # E8 - EF + ODD, ACV, ACV, ACV, ACV, ASV, ODD, ODD, # F0 - F7 + ODD, ODD, ODD, ODD, ODD, ODD, ODD, ODD, # F8 - FF +) + +# 0 : illegal +# 1 : very unlikely +# 2 : normal +# 3 : very likely +MacRomanClassModel = ( +# UDF OTH ASC ASS ACV ACO ASV ASO ODD + 0, 0, 0, 0, 0, 0, 0, 0, 0, # UDF + 0, 3, 3, 3, 3, 3, 3, 3, 1, # OTH + 0, 3, 3, 3, 3, 3, 3, 3, 1, # ASC + 0, 3, 3, 3, 1, 1, 3, 3, 1, # ASS + 0, 3, 3, 3, 1, 2, 1, 2, 1, # ACV + 0, 3, 3, 3, 3, 3, 3, 3, 1, # ACO + 0, 3, 1, 3, 1, 1, 1, 3, 1, # ASV + 0, 3, 1, 3, 1, 1, 3, 3, 1, # ASO + 0, 1, 1, 1, 1, 1, 1, 1, 1, # ODD +) +# fmt: on + + +class MacRomanProber(CharSetProber): + def __init__(self) -> None: + super().__init__() + self._last_char_class = OTH + self._freq_counter: List[int] = [] + self.reset() + + def reset(self) -> None: + self._last_char_class = OTH + self._freq_counter = [0] * FREQ_CAT_NUM + + # express the prior that MacRoman is a somewhat rare encoding; + # this can be done by starting out in a slightly improbable state + # that must be overcome + self._freq_counter[2] = 10 + + super().reset() + + @property + def charset_name(self) -> str: + return "MacRoman" + + @property + def language(self) -> str: + return "" + + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + byte_str = self.remove_xml_tags(byte_str) + for c in byte_str: + char_class = MacRoman_CharToClass[c] + freq = MacRomanClassModel[(self._last_char_class * CLASS_NUM) + char_class] + if freq == 0: + self._state = ProbingState.NOT_ME + break + self._freq_counter[freq] += 1 + self._last_char_class = char_class + + return self.state + + def get_confidence(self) -> float: + if self.state == ProbingState.NOT_ME: + return 0.01 + + total = sum(self._freq_counter) + confidence = ( + 0.0 + if total < 0.01 + else (self._freq_counter[3] - self._freq_counter[1] * 20.0) / total + ) + confidence = max(confidence, 0.0) + # lower the confidence of MacRoman so that other more accurate + # detector can take priority. + confidence *= 0.73 + return confidence diff --git a/src/pip/_vendor/chardet/mbcharsetprober.py b/src/pip/_vendor/chardet/mbcharsetprober.py index bf96ad5d490..666307e8fe0 100644 --- a/src/pip/_vendor/chardet/mbcharsetprober.py +++ b/src/pip/_vendor/chardet/mbcharsetprober.py @@ -27,8 +27,12 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Optional, Union + +from .chardistribution import CharDistributionAnalysis from .charsetprober import CharSetProber -from .enums import MachineState, ProbingState +from .codingstatemachine import CodingStateMachine +from .enums import LanguageFilter, MachineState, ProbingState class MultiByteCharSetProber(CharSetProber): @@ -36,29 +40,24 @@ class MultiByteCharSetProber(CharSetProber): MultiByteCharSetProber """ - def __init__(self, lang_filter=None): + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: super().__init__(lang_filter=lang_filter) - self.distribution_analyzer = None - self.coding_sm = None - self._last_char = [0, 0] + self.distribution_analyzer: Optional[CharDistributionAnalysis] = None + self.coding_sm: Optional[CodingStateMachine] = None + self._last_char = bytearray(b"\0\0") - def reset(self): + def reset(self) -> None: super().reset() if self.coding_sm: self.coding_sm.reset() if self.distribution_analyzer: self.distribution_analyzer.reset() - self._last_char = [0, 0] - - @property - def charset_name(self): - raise NotImplementedError + self._last_char = bytearray(b"\0\0") - @property - def language(self): - raise NotImplementedError + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + assert self.coding_sm is not None + assert self.distribution_analyzer is not None - def feed(self, byte_str): for i, byte in enumerate(byte_str): coding_state = self.coding_sm.next_state(byte) if coding_state == MachineState.ERROR: @@ -91,5 +90,6 @@ def feed(self, byte_str): return self.state - def get_confidence(self): + def get_confidence(self) -> float: + assert self.distribution_analyzer is not None return self.distribution_analyzer.get_confidence() diff --git a/src/pip/_vendor/chardet/mbcsgroupprober.py b/src/pip/_vendor/chardet/mbcsgroupprober.py index 94488360c4b..6cb9cc7b3bc 100644 --- a/src/pip/_vendor/chardet/mbcsgroupprober.py +++ b/src/pip/_vendor/chardet/mbcsgroupprober.py @@ -30,6 +30,7 @@ from .big5prober import Big5Prober from .charsetgroupprober import CharSetGroupProber from .cp949prober import CP949Prober +from .enums import LanguageFilter from .eucjpprober import EUCJPProber from .euckrprober import EUCKRProber from .euctwprober import EUCTWProber @@ -40,7 +41,7 @@ class MBCSGroupProber(CharSetGroupProber): - def __init__(self, lang_filter=None): + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: super().__init__(lang_filter=lang_filter) self.probers = [ UTF8Prober(), diff --git a/src/pip/_vendor/chardet/mbcssm.py b/src/pip/_vendor/chardet/mbcssm.py index d3b9c4b75a2..7bbe97e6665 100644 --- a/src/pip/_vendor/chardet/mbcssm.py +++ b/src/pip/_vendor/chardet/mbcssm.py @@ -25,6 +25,7 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from .codingstatemachinedict import CodingStateMachineDict from .enums import MachineState # BIG5 @@ -74,7 +75,7 @@ BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0) -BIG5_SM_MODEL = { +BIG5_SM_MODEL: CodingStateMachineDict = { "class_table": BIG5_CLS, "class_factor": 5, "state_table": BIG5_ST, @@ -117,7 +118,7 @@ CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) -CP949_SM_MODEL = { +CP949_SM_MODEL: CodingStateMachineDict = { "class_table": CP949_CLS, "class_factor": 10, "state_table": CP949_ST, @@ -173,7 +174,7 @@ EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0) -EUCJP_SM_MODEL = { +EUCJP_SM_MODEL: CodingStateMachineDict = { "class_table": EUCJP_CLS, "class_factor": 6, "state_table": EUCJP_ST, @@ -226,7 +227,7 @@ EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0) -EUCKR_SM_MODEL = { +EUCKR_SM_MODEL: CodingStateMachineDict = { "class_table": EUCKR_CLS, "class_factor": 4, "state_table": EUCKR_ST, @@ -283,7 +284,7 @@ JOHAB_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 0, 0, 2, 2, 2) -JOHAB_SM_MODEL = { +JOHAB_SM_MODEL: CodingStateMachineDict = { "class_table": JOHAB_CLS, "class_factor": 10, "state_table": JOHAB_ST, @@ -340,7 +341,7 @@ EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3) -EUCTW_SM_MODEL = { +EUCTW_SM_MODEL: CodingStateMachineDict = { "class_table": EUCTW_CLS, "class_factor": 7, "state_table": EUCTW_ST, @@ -402,7 +403,7 @@ # 2 here. GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2) -GB2312_SM_MODEL = { +GB2312_SM_MODEL: CodingStateMachineDict = { "class_table": GB2312_CLS, "class_factor": 7, "state_table": GB2312_ST, @@ -458,7 +459,7 @@ SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0) -SJIS_SM_MODEL = { +SJIS_SM_MODEL: CodingStateMachineDict = { "class_table": SJIS_CLS, "class_factor": 6, "state_table": SJIS_ST, @@ -516,7 +517,7 @@ UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2) -UCS2BE_SM_MODEL = { +UCS2BE_SM_MODEL: CodingStateMachineDict = { "class_table": UCS2BE_CLS, "class_factor": 6, "state_table": UCS2BE_ST, @@ -574,7 +575,7 @@ UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2) -UCS2LE_SM_MODEL = { +UCS2LE_SM_MODEL: CodingStateMachineDict = { "class_table": UCS2LE_CLS, "class_factor": 6, "state_table": UCS2LE_ST, @@ -651,7 +652,7 @@ UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) -UTF8_SM_MODEL = { +UTF8_SM_MODEL: CodingStateMachineDict = { "class_table": UTF8_CLS, "class_factor": 16, "state_table": UTF8_ST, diff --git a/src/pip/_vendor/chardet/metadata/languages.py b/src/pip/_vendor/chardet/metadata/languages.py index 1d37884c31e..eb40c5f0c85 100644 --- a/src/pip/_vendor/chardet/metadata/languages.py +++ b/src/pip/_vendor/chardet/metadata/languages.py @@ -6,6 +6,7 @@ """ from string import ascii_letters +from typing import List, Optional # TODO: Add Ukrainian (KOI8-U) @@ -33,13 +34,13 @@ class Language: def __init__( self, - name=None, - iso_code=None, - use_ascii=True, - charsets=None, - alphabet=None, - wiki_start_pages=None, - ): + name: Optional[str] = None, + iso_code: Optional[str] = None, + use_ascii: bool = True, + charsets: Optional[List[str]] = None, + alphabet: Optional[str] = None, + wiki_start_pages: Optional[List[str]] = None, + ) -> None: super().__init__() self.name = name self.iso_code = iso_code @@ -55,7 +56,7 @@ def __init__( self.alphabet = "".join(sorted(set(alphabet))) if alphabet else None self.wiki_start_pages = wiki_start_pages - def __repr__(self): + def __repr__(self) -> str: param_str = ", ".join( f"{k}={v!r}" for k, v in self.__dict__.items() if not k.startswith("_") ) @@ -103,7 +104,7 @@ def __repr__(self): name="Danish", iso_code="da", use_ascii=True, - charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"], + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], alphabet="æøåÆØÅ", wiki_start_pages=["Forside"], ), @@ -111,8 +112,8 @@ def __repr__(self): name="German", iso_code="de", use_ascii=True, - charsets=["ISO-8859-1", "WINDOWS-1252"], - alphabet="äöüßÄÖÜ", + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="äöüßẞÄÖÜ", wiki_start_pages=["Wikipedia:Hauptseite"], ), "Greek": Language( @@ -127,7 +128,7 @@ def __repr__(self): name="English", iso_code="en", use_ascii=True, - charsets=["ISO-8859-1", "WINDOWS-1252"], + charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"], wiki_start_pages=["Main_Page"], ), "Esperanto": Language( @@ -143,7 +144,7 @@ def __repr__(self): name="Spanish", iso_code="es", use_ascii=True, - charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"], + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], alphabet="ñáéíóúüÑÁÉÍÓÚÜ", wiki_start_pages=["Wikipedia:Portada"], ), @@ -161,7 +162,7 @@ def __repr__(self): name="Finnish", iso_code="fi", use_ascii=True, - charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"], + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], alphabet="ÅÄÖŠŽåäöšž", wiki_start_pages=["Wikipedia:Etusivu"], ), @@ -169,7 +170,7 @@ def __repr__(self): name="French", iso_code="fr", use_ascii=True, - charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"], + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], alphabet="œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ", wiki_start_pages=["Wikipédia:Accueil_principal", "Bœuf (animal)"], ), @@ -203,7 +204,7 @@ def __repr__(self): name="Italian", iso_code="it", use_ascii=True, - charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"], + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], alphabet="ÀÈÉÌÒÓÙàèéìòóù", wiki_start_pages=["Pagina_principale"], ), @@ -237,7 +238,7 @@ def __repr__(self): name="Dutch", iso_code="nl", use_ascii=True, - charsets=["ISO-8859-1", "WINDOWS-1252"], + charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"], wiki_start_pages=["Hoofdpagina"], ), "Polish": Language( @@ -253,7 +254,7 @@ def __repr__(self): name="Portuguese", iso_code="pt", use_ascii=True, - charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"], + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], alphabet="ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú", wiki_start_pages=["Wikipédia:Página_principal"], ), diff --git a/src/pip/_vendor/chardet/py.typed b/src/pip/_vendor/chardet/py.typed new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/pip/_vendor/chardet/resultdict.py b/src/pip/_vendor/chardet/resultdict.py new file mode 100644 index 00000000000..7d36e64c467 --- /dev/null +++ b/src/pip/_vendor/chardet/resultdict.py @@ -0,0 +1,16 @@ +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + # TypedDict was introduced in Python 3.8. + # + # TODO: Remove the else block and TYPE_CHECKING check when dropping support + # for Python 3.7. + from typing import TypedDict + + class ResultDict(TypedDict): + encoding: Optional[str] + confidence: float + language: Optional[str] + +else: + ResultDict = dict diff --git a/src/pip/_vendor/chardet/sbcharsetprober.py b/src/pip/_vendor/chardet/sbcharsetprober.py index 31d70e154a9..0ffbcdd2c3e 100644 --- a/src/pip/_vendor/chardet/sbcharsetprober.py +++ b/src/pip/_vendor/chardet/sbcharsetprober.py @@ -26,23 +26,20 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from collections import namedtuple +from typing import Dict, List, NamedTuple, Optional, Union from .charsetprober import CharSetProber from .enums import CharacterCategory, ProbingState, SequenceLikelihood -SingleByteCharSetModel = namedtuple( - "SingleByteCharSetModel", - [ - "charset_name", - "language", - "char_to_order_map", - "language_model", - "typical_positive_ratio", - "keep_ascii_letters", - "alphabet", - ], -) + +class SingleByteCharSetModel(NamedTuple): + charset_name: str + language: str + char_to_order_map: Dict[int, int] + language_model: Dict[int, Dict[int, int]] + typical_positive_ratio: float + keep_ascii_letters: bool + alphabet: str class SingleByteCharSetProber(CharSetProber): @@ -51,22 +48,27 @@ class SingleByteCharSetProber(CharSetProber): POSITIVE_SHORTCUT_THRESHOLD = 0.95 NEGATIVE_SHORTCUT_THRESHOLD = 0.05 - def __init__(self, model, is_reversed=False, name_prober=None): + def __init__( + self, + model: SingleByteCharSetModel, + is_reversed: bool = False, + name_prober: Optional[CharSetProber] = None, + ) -> None: super().__init__() self._model = model # TRUE if we need to reverse every pair in the model lookup self._reversed = is_reversed # Optional auxiliary prober for name decision self._name_prober = name_prober - self._last_order = None - self._seq_counters = None - self._total_seqs = None - self._total_char = None - self._control_char = None - self._freq_char = None + self._last_order = 255 + self._seq_counters: List[int] = [] + self._total_seqs = 0 + self._total_char = 0 + self._control_char = 0 + self._freq_char = 0 self.reset() - def reset(self): + def reset(self) -> None: super().reset() # char order of last character self._last_order = 255 @@ -78,18 +80,18 @@ def reset(self): self._freq_char = 0 @property - def charset_name(self): + def charset_name(self) -> Optional[str]: if self._name_prober: return self._name_prober.charset_name return self._model.charset_name @property - def language(self): + def language(self) -> Optional[str]: if self._name_prober: return self._name_prober.language return self._model.language - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: # TODO: Make filter_international_words keep things in self.alphabet if not self._model.keep_ascii_letters: byte_str = self.filter_international_words(byte_str) @@ -139,7 +141,7 @@ def feed(self, byte_str): return self.state - def get_confidence(self): + def get_confidence(self) -> float: r = 0.01 if self._total_seqs > 0: r = ( diff --git a/src/pip/_vendor/chardet/sbcsgroupprober.py b/src/pip/_vendor/chardet/sbcsgroupprober.py index cad001cb10e..890ae8465c5 100644 --- a/src/pip/_vendor/chardet/sbcsgroupprober.py +++ b/src/pip/_vendor/chardet/sbcsgroupprober.py @@ -48,7 +48,7 @@ class SBCSGroupProber(CharSetGroupProber): - def __init__(self): + def __init__(self) -> None: super().__init__() hebrew_prober = HebrewProber() logical_hebrew_prober = SingleByteCharSetProber( diff --git a/src/pip/_vendor/chardet/sjisprober.py b/src/pip/_vendor/chardet/sjisprober.py index 3bcbdb71d16..91df077961b 100644 --- a/src/pip/_vendor/chardet/sjisprober.py +++ b/src/pip/_vendor/chardet/sjisprober.py @@ -25,6 +25,8 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Union + from .chardistribution import SJISDistributionAnalysis from .codingstatemachine import CodingStateMachine from .enums import MachineState, ProbingState @@ -34,26 +36,29 @@ class SJISProber(MultiByteCharSetProber): - def __init__(self): + def __init__(self) -> None: super().__init__() self.coding_sm = CodingStateMachine(SJIS_SM_MODEL) self.distribution_analyzer = SJISDistributionAnalysis() self.context_analyzer = SJISContextAnalysis() self.reset() - def reset(self): + def reset(self) -> None: super().reset() self.context_analyzer.reset() @property - def charset_name(self): + def charset_name(self) -> str: return self.context_analyzer.charset_name @property - def language(self): + def language(self) -> str: return "Japanese" - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + assert self.coding_sm is not None + assert self.distribution_analyzer is not None + for i, byte in enumerate(byte_str): coding_state = self.coding_sm.next_state(byte) if coding_state == MachineState.ERROR: @@ -92,7 +97,9 @@ def feed(self, byte_str): return self.state - def get_confidence(self): + def get_confidence(self) -> float: + assert self.distribution_analyzer is not None + context_conf = self.context_analyzer.get_confidence() distrib_conf = self.distribution_analyzer.get_confidence() return max(context_conf, distrib_conf) diff --git a/src/pip/_vendor/chardet/universaldetector.py b/src/pip/_vendor/chardet/universaldetector.py index 22fcf8290c1..30c441dc28e 100644 --- a/src/pip/_vendor/chardet/universaldetector.py +++ b/src/pip/_vendor/chardet/universaldetector.py @@ -39,12 +39,16 @@ class a user of ``chardet`` should use. import codecs import logging import re +from typing import List, Optional, Union from .charsetgroupprober import CharSetGroupProber +from .charsetprober import CharSetProber from .enums import InputState, LanguageFilter, ProbingState from .escprober import EscCharSetProber from .latin1prober import Latin1Prober +from .macromanprober import MacRomanProber from .mbcsgroupprober import MBCSGroupProber +from .resultdict import ResultDict from .sbcsgroupprober import SBCSGroupProber from .utf1632prober import UTF1632Prober @@ -80,34 +84,55 @@ class UniversalDetector: "iso-8859-9": "Windows-1254", "iso-8859-13": "Windows-1257", } + # Based on https://encoding.spec.whatwg.org/#names-and-labels + # but altered to match Python names for encodings and remove mappings + # that break tests. + LEGACY_MAP = { + "ascii": "Windows-1252", + "iso-8859-1": "Windows-1252", + "tis-620": "ISO-8859-11", + "iso-8859-9": "Windows-1254", + "gb2312": "GB18030", + "euc-kr": "CP949", + "utf-16le": "UTF-16", + } - def __init__(self, lang_filter=LanguageFilter.ALL): - self._esc_charset_prober = None - self._utf1632_prober = None - self._charset_probers = [] - self.result = None - self.done = None - self._got_data = None - self._input_state = None - self._last_char = None + def __init__( + self, + lang_filter: LanguageFilter = LanguageFilter.ALL, + should_rename_legacy: bool = False, + ) -> None: + self._esc_charset_prober: Optional[EscCharSetProber] = None + self._utf1632_prober: Optional[UTF1632Prober] = None + self._charset_probers: List[CharSetProber] = [] + self.result: ResultDict = { + "encoding": None, + "confidence": 0.0, + "language": None, + } + self.done = False + self._got_data = False + self._input_state = InputState.PURE_ASCII + self._last_char = b"" self.lang_filter = lang_filter self.logger = logging.getLogger(__name__) - self._has_win_bytes = None + self._has_win_bytes = False + self.should_rename_legacy = should_rename_legacy self.reset() @property - def input_state(self): + def input_state(self) -> int: return self._input_state @property - def has_win_bytes(self): + def has_win_bytes(self) -> bool: return self._has_win_bytes @property - def charset_probers(self): + def charset_probers(self) -> List[CharSetProber]: return self._charset_probers - def reset(self): + def reset(self) -> None: """ Reset the UniversalDetector and all of its probers back to their initial states. This is called by ``__init__``, so you only need to @@ -126,7 +151,7 @@ def reset(self): for prober in self._charset_probers: prober.reset() - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> None: """ Takes a chunk of a document and feeds it through all of the relevant charset probers. @@ -166,6 +191,7 @@ def feed(self, byte_str): elif byte_str.startswith(b"\xFE\xFF\x00\x00"): # FE FF 00 00 UCS-4, unusual octet order BOM (3412) self.result = { + # TODO: This encoding is not supported by Python. Should remove? "encoding": "X-ISO-10646-UCS-4-3412", "confidence": 1.0, "language": "", @@ -173,6 +199,7 @@ def feed(self, byte_str): elif byte_str.startswith(b"\x00\x00\xFF\xFE"): # 00 00 FF FE UCS-4, unusual octet order BOM (2143) self.result = { + # TODO: This encoding is not supported by Python. Should remove? "encoding": "X-ISO-10646-UCS-4-2143", "confidence": 1.0, "language": "", @@ -242,6 +269,7 @@ def feed(self, byte_str): if self.lang_filter & LanguageFilter.NON_CJK: self._charset_probers.append(SBCSGroupProber()) self._charset_probers.append(Latin1Prober()) + self._charset_probers.append(MacRomanProber()) for prober in self._charset_probers: if prober.feed(byte_str) == ProbingState.FOUND_IT: self.result = { @@ -254,7 +282,7 @@ def feed(self, byte_str): if self.WIN_BYTE_DETECTOR.search(byte_str): self._has_win_bytes = True - def close(self): + def close(self) -> ResultDict: """ Stop analyzing the current document and come up with a final prediction. @@ -288,7 +316,8 @@ def close(self): max_prober = prober if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD): charset_name = max_prober.charset_name - lower_charset_name = max_prober.charset_name.lower() + assert charset_name is not None + lower_charset_name = charset_name.lower() confidence = max_prober.get_confidence() # Use Windows encoding name instead of ISO-8859 if we saw any # extra Windows-specific bytes @@ -297,6 +326,11 @@ def close(self): charset_name = self.ISO_WIN_MAP.get( lower_charset_name, charset_name ) + # Rename legacy encodings with superset encodings if asked + if self.should_rename_legacy: + charset_name = self.LEGACY_MAP.get( + (charset_name or "").lower(), charset_name + ) self.result = { "encoding": charset_name, "confidence": confidence, diff --git a/src/pip/_vendor/chardet/utf1632prober.py b/src/pip/_vendor/chardet/utf1632prober.py index 9fd1580b837..6bdec63d686 100644 --- a/src/pip/_vendor/chardet/utf1632prober.py +++ b/src/pip/_vendor/chardet/utf1632prober.py @@ -18,6 +18,8 @@ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import List, Union + from .charsetprober import CharSetProber from .enums import ProbingState @@ -36,7 +38,7 @@ class UTF1632Prober(CharSetProber): # a fixed constant ratio of expected zeros or non-zeros in modulo-position. EXPECTED_RATIO = 0.94 - def __init__(self): + def __init__(self) -> None: super().__init__() self.position = 0 self.zeros_at_mod = [0] * 4 @@ -51,7 +53,7 @@ def __init__(self): self.first_half_surrogate_pair_detected_16le = False self.reset() - def reset(self): + def reset(self) -> None: super().reset() self.position = 0 self.zeros_at_mod = [0] * 4 @@ -66,7 +68,7 @@ def reset(self): self.quad = [0, 0, 0, 0] @property - def charset_name(self): + def charset_name(self) -> str: if self.is_likely_utf32be(): return "utf-32be" if self.is_likely_utf32le(): @@ -79,16 +81,16 @@ def charset_name(self): return "utf-16" @property - def language(self): + def language(self) -> str: return "" - def approx_32bit_chars(self): + def approx_32bit_chars(self) -> float: return max(1.0, self.position / 4.0) - def approx_16bit_chars(self): + def approx_16bit_chars(self) -> float: return max(1.0, self.position / 2.0) - def is_likely_utf32be(self): + def is_likely_utf32be(self) -> bool: approx_chars = self.approx_32bit_chars() return approx_chars >= self.MIN_CHARS_FOR_DETECTION and ( self.zeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO @@ -98,7 +100,7 @@ def is_likely_utf32be(self): and not self.invalid_utf32be ) - def is_likely_utf32le(self): + def is_likely_utf32le(self) -> bool: approx_chars = self.approx_32bit_chars() return approx_chars >= self.MIN_CHARS_FOR_DETECTION and ( self.nonzeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO @@ -108,7 +110,7 @@ def is_likely_utf32le(self): and not self.invalid_utf32le ) - def is_likely_utf16be(self): + def is_likely_utf16be(self) -> bool: approx_chars = self.approx_16bit_chars() return approx_chars >= self.MIN_CHARS_FOR_DETECTION and ( (self.nonzeros_at_mod[1] + self.nonzeros_at_mod[3]) / approx_chars @@ -118,7 +120,7 @@ def is_likely_utf16be(self): and not self.invalid_utf16be ) - def is_likely_utf16le(self): + def is_likely_utf16le(self) -> bool: approx_chars = self.approx_16bit_chars() return approx_chars >= self.MIN_CHARS_FOR_DETECTION and ( (self.nonzeros_at_mod[0] + self.nonzeros_at_mod[2]) / approx_chars @@ -128,7 +130,7 @@ def is_likely_utf16le(self): and not self.invalid_utf16le ) - def validate_utf32_characters(self, quad): + def validate_utf32_characters(self, quad: List[int]) -> None: """ Validate if the quad of bytes is valid UTF-32. @@ -150,7 +152,7 @@ def validate_utf32_characters(self, quad): ): self.invalid_utf32le = True - def validate_utf16_characters(self, pair): + def validate_utf16_characters(self, pair: List[int]) -> None: """ Validate if the pair of bytes is valid UTF-16. @@ -182,7 +184,7 @@ def validate_utf16_characters(self, pair): else: self.invalid_utf16le = True - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: for c in byte_str: mod4 = self.position % 4 self.quad[mod4] = c @@ -198,7 +200,7 @@ def feed(self, byte_str): return self.state @property - def state(self): + def state(self) -> ProbingState: if self._state in {ProbingState.NOT_ME, ProbingState.FOUND_IT}: # terminal, decided states return self._state @@ -210,7 +212,7 @@ def state(self): self._state = ProbingState.NOT_ME return self._state - def get_confidence(self): + def get_confidence(self) -> float: return ( 0.85 if ( diff --git a/src/pip/_vendor/chardet/utf8prober.py b/src/pip/_vendor/chardet/utf8prober.py index 3aae09e8630..d96354d97c2 100644 --- a/src/pip/_vendor/chardet/utf8prober.py +++ b/src/pip/_vendor/chardet/utf8prober.py @@ -25,6 +25,8 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Union + from .charsetprober import CharSetProber from .codingstatemachine import CodingStateMachine from .enums import MachineState, ProbingState @@ -34,26 +36,26 @@ class UTF8Prober(CharSetProber): ONE_CHAR_PROB = 0.5 - def __init__(self): + def __init__(self) -> None: super().__init__() self.coding_sm = CodingStateMachine(UTF8_SM_MODEL) - self._num_mb_chars = None + self._num_mb_chars = 0 self.reset() - def reset(self): + def reset(self) -> None: super().reset() self.coding_sm.reset() self._num_mb_chars = 0 @property - def charset_name(self): + def charset_name(self) -> str: return "utf-8" @property - def language(self): + def language(self) -> str: return "" - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: for c in byte_str: coding_state = self.coding_sm.next_state(c) if coding_state == MachineState.ERROR: @@ -72,7 +74,7 @@ def feed(self, byte_str): return self.state - def get_confidence(self): + def get_confidence(self) -> float: unlike = 0.99 if self._num_mb_chars < 6: unlike *= self.ONE_CHAR_PROB**self._num_mb_chars diff --git a/src/pip/_vendor/chardet/version.py b/src/pip/_vendor/chardet/version.py index a08a06b9a87..c5e9d85cd75 100644 --- a/src/pip/_vendor/chardet/version.py +++ b/src/pip/_vendor/chardet/version.py @@ -1,9 +1,9 @@ """ This module exists only to simplify retrieving the version number of chardet -from within setup.py and from chardet subpackages. +from within setuptools and from chardet subpackages. :author: Dan Blanchard (dan.blanchard@gmail.com) """ -__version__ = "5.0.0" +__version__ = "5.1.0" VERSION = __version__.split(".") diff --git a/src/pip/_vendor/vendor.txt b/src/pip/_vendor/vendor.txt index cd42578f4d3..31cda4a8095 100644 --- a/src/pip/_vendor/vendor.txt +++ b/src/pip/_vendor/vendor.txt @@ -9,7 +9,7 @@ pyparsing==3.0.9 pyproject-hooks==1.0.0 requests==2.28.2 certifi==2022.12.7 - chardet==5.0.0 + chardet==5.1.0 idna==3.4 urllib3==1.26.12 rich==12.6.0 From 17b73457a18bb990b4955b1d5d55fc054acbea38 Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sat, 28 Jan 2023 20:41:54 +0000 Subject: [PATCH 5/6] Upgrade urllib3 to 1.26.14 --- news/urllib3.vendor.rst | 1 + src/pip/_vendor/urllib3/_version.py | 2 +- src/pip/_vendor/urllib3/connectionpool.py | 2 +- src/pip/_vendor/urllib3/contrib/appengine.py | 2 +- src/pip/_vendor/urllib3/contrib/ntlmpool.py | 4 ++-- src/pip/_vendor/urllib3/contrib/pyopenssl.py | 7 +++---- src/pip/_vendor/urllib3/response.py | 13 +++++++++++++ src/pip/_vendor/urllib3/util/retry.py | 2 +- src/pip/_vendor/urllib3/util/url.py | 2 +- src/pip/_vendor/vendor.txt | 2 +- 10 files changed, 25 insertions(+), 12 deletions(-) create mode 100644 news/urllib3.vendor.rst diff --git a/news/urllib3.vendor.rst b/news/urllib3.vendor.rst new file mode 100644 index 00000000000..c9d10554e0e --- /dev/null +++ b/news/urllib3.vendor.rst @@ -0,0 +1 @@ +Upgrade urllib3 to 1.26.14 diff --git a/src/pip/_vendor/urllib3/_version.py b/src/pip/_vendor/urllib3/_version.py index 6fbc84b30f2..7c031661ba8 100644 --- a/src/pip/_vendor/urllib3/_version.py +++ b/src/pip/_vendor/urllib3/_version.py @@ -1,2 +1,2 @@ # This file is protected via CODEOWNERS -__version__ = "1.26.12" +__version__ = "1.26.14" diff --git a/src/pip/_vendor/urllib3/connectionpool.py b/src/pip/_vendor/urllib3/connectionpool.py index 96339e90af1..70873927924 100644 --- a/src/pip/_vendor/urllib3/connectionpool.py +++ b/src/pip/_vendor/urllib3/connectionpool.py @@ -862,7 +862,7 @@ def _is_ssl_error_message_from_http_proxy(ssl_error): ) # Check if we should retry the HTTP response. - has_retry_after = bool(response.getheader("Retry-After")) + has_retry_after = bool(response.headers.get("Retry-After")) if retries.is_retry(method, response.status, has_retry_after): try: retries = retries.increment(method, url, response=response, _pool=self) diff --git a/src/pip/_vendor/urllib3/contrib/appengine.py b/src/pip/_vendor/urllib3/contrib/appengine.py index 668538695f9..1717ee22cdf 100644 --- a/src/pip/_vendor/urllib3/contrib/appengine.py +++ b/src/pip/_vendor/urllib3/contrib/appengine.py @@ -224,7 +224,7 @@ def urlopen( ) # Check if we should retry the HTTP response. - has_retry_after = bool(http_response.getheader("Retry-After")) + has_retry_after = bool(http_response.headers.get("Retry-After")) if retries.is_retry(method, http_response.status, has_retry_after): retries = retries.increment(method, url, response=http_response, _pool=self) log.debug("Retry: %s", url) diff --git a/src/pip/_vendor/urllib3/contrib/ntlmpool.py b/src/pip/_vendor/urllib3/contrib/ntlmpool.py index 41a8fd174cb..471665754e9 100644 --- a/src/pip/_vendor/urllib3/contrib/ntlmpool.py +++ b/src/pip/_vendor/urllib3/contrib/ntlmpool.py @@ -69,7 +69,7 @@ def _new_conn(self): log.debug("Request headers: %s", headers) conn.request("GET", self.authurl, None, headers) res = conn.getresponse() - reshdr = dict(res.getheaders()) + reshdr = dict(res.headers) log.debug("Response status: %s %s", res.status, res.reason) log.debug("Response headers: %s", reshdr) log.debug("Response data: %s [...]", res.read(100)) @@ -101,7 +101,7 @@ def _new_conn(self): conn.request("GET", self.authurl, None, headers) res = conn.getresponse() log.debug("Response status: %s %s", res.status, res.reason) - log.debug("Response headers: %s", dict(res.getheaders())) + log.debug("Response headers: %s", dict(res.headers)) log.debug("Response data: %s [...]", res.read()[:100]) if res.status != 200: if res.status == 401: diff --git a/src/pip/_vendor/urllib3/contrib/pyopenssl.py b/src/pip/_vendor/urllib3/contrib/pyopenssl.py index 528764a0334..19e4aa97cc1 100644 --- a/src/pip/_vendor/urllib3/contrib/pyopenssl.py +++ b/src/pip/_vendor/urllib3/contrib/pyopenssl.py @@ -47,10 +47,10 @@ """ from __future__ import absolute_import +import OpenSSL.crypto import OpenSSL.SSL from cryptography import x509 from cryptography.hazmat.backends.openssl import backend as openssl_backend -from cryptography.hazmat.backends.openssl.x509 import _Certificate try: from cryptography.x509 import UnsupportedExtension @@ -228,9 +228,8 @@ def get_subj_alt_name(peer_cert): if hasattr(peer_cert, "to_cryptography"): cert = peer_cert.to_cryptography() else: - # This is technically using private APIs, but should work across all - # relevant versions before PyOpenSSL got a proper API for this. - cert = _Certificate(openssl_backend, peer_cert._x509) + der = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, peer_cert) + cert = x509.load_der_x509_certificate(der, openssl_backend) # We want to find the SAN extension. Ask Cryptography to locate it (it's # faster than looping in Python) diff --git a/src/pip/_vendor/urllib3/response.py b/src/pip/_vendor/urllib3/response.py index 4969b70e3ef..8909f8454e9 100644 --- a/src/pip/_vendor/urllib3/response.py +++ b/src/pip/_vendor/urllib3/response.py @@ -3,6 +3,7 @@ import io import logging import sys +import warnings import zlib from contextlib import contextmanager from socket import error as SocketError @@ -657,9 +658,21 @@ def from_httplib(ResponseCls, r, **response_kw): # Backwards-compatibility methods for http.client.HTTPResponse def getheaders(self): + warnings.warn( + "HTTPResponse.getheaders() is deprecated and will be removed " + "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", + category=DeprecationWarning, + stacklevel=2, + ) return self.headers def getheader(self, name, default=None): + warnings.warn( + "HTTPResponse.getheader() is deprecated and will be removed " + "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", + category=DeprecationWarning, + stacklevel=2, + ) return self.headers.get(name, default) # Backwards compatibility for http.cookiejar diff --git a/src/pip/_vendor/urllib3/util/retry.py b/src/pip/_vendor/urllib3/util/retry.py index 3398323fd7c..2490d5e5b63 100644 --- a/src/pip/_vendor/urllib3/util/retry.py +++ b/src/pip/_vendor/urllib3/util/retry.py @@ -394,7 +394,7 @@ def parse_retry_after(self, retry_after): def get_retry_after(self, response): """Get the value of Retry-After in seconds.""" - retry_after = response.getheader("Retry-After") + retry_after = response.headers.get("Retry-After") if retry_after is None: return None diff --git a/src/pip/_vendor/urllib3/util/url.py b/src/pip/_vendor/urllib3/util/url.py index 86bd8b48ab0..d6d0bbcea66 100644 --- a/src/pip/_vendor/urllib3/util/url.py +++ b/src/pip/_vendor/urllib3/util/url.py @@ -63,7 +63,7 @@ BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$") ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$") -_HOST_PORT_PAT = ("^(%s|%s|%s)(?::([0-9]{0,5}))?$") % ( +_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % ( REG_NAME_PAT, IPV4_PAT, IPV6_ADDRZ_PAT, diff --git a/src/pip/_vendor/vendor.txt b/src/pip/_vendor/vendor.txt index 31cda4a8095..67452d89fcf 100644 --- a/src/pip/_vendor/vendor.txt +++ b/src/pip/_vendor/vendor.txt @@ -11,7 +11,7 @@ requests==2.28.2 certifi==2022.12.7 chardet==5.1.0 idna==3.4 - urllib3==1.26.12 + urllib3==1.26.14 rich==12.6.0 pygments==2.13.0 typing_extensions==4.4.0 From acd7ef1f9aa74efbec786e3929faca1201b4b422 Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sat, 28 Jan 2023 22:35:25 +0000 Subject: [PATCH 6/6] Perform relaxed version matching in `pip debug` test This ensures that we're not trying to compare versions as equal strings. --- tests/functional/test_debug.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/functional/test_debug.py b/tests/functional/test_debug.py index 41374f8cb88..77cd732f9f1 100644 --- a/tests/functional/test_debug.py +++ b/tests/functional/test_debug.py @@ -1,6 +1,8 @@ +import re from typing import List import pytest +from pip._vendor.packaging.version import Version from pip._internal.commands.debug import create_vendor_txt_map from pip._internal.utils import compatibility_tags @@ -45,7 +47,9 @@ def test_debug__library_versions(script: PipTestEnvironment) -> None: vendored_versions = create_vendor_txt_map() for name, value in vendored_versions.items(): - assert f"{name}=={value}" in result.stdout + match = re.search(rf"{name}==(\S+)", result.stdout) + assert match is not None, f"Could not find {name} in output" + assert Version(match.group(1)) == Version(value) @pytest.mark.parametrize(