From 4230bbbfa8b191650add38ebfee9682f8e647efe Mon Sep 17 00:00:00 2001 From: dosisod <39638017+dosisod@users.noreply.github.com> Date: Tue, 4 Oct 2022 14:59:33 -0700 Subject: [PATCH] Use `utf8` locale when reading files (#43) --- .github/workflows/ci.yml | 5 +++++ refurb/main.py | 2 +- test/e2e/gbk.py | 2 ++ test/test_checks.py | 2 +- test/test_main.py | 26 ++++++++++++++++++++++++++ 5 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 test/e2e/gbk.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d2e522f..e27083e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,6 +13,11 @@ jobs: steps: - uses: actions/checkout@v3 + - name: Install locales + run: | + sudo locale-gen zh_CN.GBK + sudo update-locale + - uses: actions/setup-python@v4 with: python-version: '3.10' diff --git a/refurb/main.py b/refurb/main.py index 5ee438a..4ee437d 100644 --- a/refurb/main.py +++ b/refurb/main.py @@ -55,7 +55,7 @@ def version() -> str: # pragma: no cover @cache def get_source_lines(filepath: str) -> list[str]: - return Path(filepath).read_text().splitlines() + return Path(filepath).read_text("utf8").splitlines() def ignored_via_comment(error: Error | str) -> bool: diff --git a/test/e2e/gbk.py b/test/e2e/gbk.py new file mode 100644 index 0000000..c23e128 --- /dev/null +++ b/test/e2e/gbk.py @@ -0,0 +1,2 @@ +print("") +print("一些中文") diff --git a/test/test_checks.py b/test/test_checks.py index 0f95bd5..e46394d 100644 --- a/test/test_checks.py +++ b/test/test_checks.py @@ -8,7 +8,7 @@ def test_checks() -> None: - errors = run_refurb(Settings(files=["test/"])) + errors = run_refurb(Settings(files=["test/data"])) got = "\n".join([str(error) for error in errors]) files = sorted(TEST_DATA_PATH.glob("*.txt"), key=lambda p: p.name) diff --git a/test/test_main.py b/test/test_main.py index 618c4de..d7e0ed0 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -1,6 +1,10 @@ +import os from dataclasses import dataclass +from locale import LC_ALL, setlocale from unittest.mock import patch +import pytest + from refurb.error import Error from refurb.main import main, run_refurb, sort_errors from refurb.settings import Settings @@ -128,3 +132,25 @@ def test_no_blank_line_printed_if_there_are_no_errors(): main(["test/e2e/dummy.py"]) assert p.call_count == 0 + + +@pytest.mark.skipif(not os.getenv("CI"), reason="Locale installation required") +def test_utf8_is_used_to_load_files_when_error_occurs(): # type: ignore + """ + See issue https://github.com/dosisod/refurb/issues/37. This check will + set the zh_CN.GBK locale, run a particular file, and if all goes well, + no exception will be thrown. This test is only ran when the CI environment + variable is set, which is set by GitHub Actions. + """ + + setlocale(LC_ALL, "zh_CN.GBK") + + try: + main(["test/e2e/gbk.py"]) + + except UnicodeDecodeError: + setlocale(LC_ALL, "") + + raise + + setlocale(LC_ALL, "")