MNT Add codespell config and workflow (#1628)

Co-authored-by: Loïc Estève <[email protected]>
joblib · Jan 16, 2025 · 85bca66 · 85bca66
1 parent e8447e5
commit 85bca66
Show file tree

Hide file tree

Showing 20 changed files with 62 additions and 30 deletions.
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
@@ -0,0 +1,25 @@
+# Codespell configuration is within pyproject.toml
+---
+name: Codespell
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Annotate locations with typos
+        uses: codespell-project/codespell-problem-matcher@v1
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -731,7 +731,7 @@ Olivier Grisel
 
     Expose :class:`joblib.parallel.ParallelBackendBase` and
     :class:`joblib.parallel.AutoBatchingMixin` in the public API to
-    make them officially re-usable by backend implementers.
+    make them officially reusable by backend implementers.
 
 
 Release 0.10.0
@@ -866,7 +866,7 @@ Olivier Grisel
 
 Olivier Grisel
 
-    New context manager based API (``with`` block) to re-use
+    New context manager based API (``with`` block) to reuse
     the same pool of workers across consecutive parallel calls.
 
 Vlad Niculae and Olivier Grisel

diff --git a/doc/memory.rst b/doc/memory.rst
@@ -385,7 +385,7 @@ Gotchas
 
 * **Cache-miss with objects that have non-reproducible pickle representations**.
   The identifier of the cache entry is based on the pickle's representation of
-  the input arguments. Therefor, for objects that don't have a deterministic
+  the input arguments. Therefore, for objects that don't have a deterministic
   pickle representation, or objects whose representation depends on the way they
   are constructed, the cache will not work. In particular, ``pytorch.Tensor``
   are known to have non-deterministic pickle representation (see this

diff --git a/doc/parallel.rst b/doc/parallel.rst
@@ -173,7 +173,7 @@ create and destroy a pool of workers (threads or processes) several times which
 can cause a significant overhead.
 
 For this case it is more efficient to use the context manager API of the
-:class:`joblib.Parallel` class to re-use the same pool of workers for several
+:class:`joblib.Parallel` class to reuse the same pool of workers for several
 calls to the :class:`joblib.Parallel` object::
 
     >>> with Parallel(n_jobs=2) as parallel:

diff --git a/examples/parallel_generator.py b/examples/parallel_generator.py
@@ -146,7 +146,7 @@ def accumulator_sum(generator):
 
 
 ##############################################################################
-# We can then report the memory usage accross time of the two runs using the
+# We can then report the memory usage across time of the two runs using the
 # MemoryMonitor.
 #
 # In the first case, as the results accumulate in ``res``, the memory grows
@@ -211,7 +211,7 @@ def accumulator_sum(generator):
 
 ##############################################################################
 # To better highlight improvements in memory usage when using the parameter
-# ``return_as="generator_unordered"``, let's explcitly add delay in some of
+# ``return_as="generator_unordered"``, let's explicitly add delay in some of
 # the submitted tasks.
 
 

diff --git a/joblib/externals/loky/backend/context.py b/joblib/externals/loky/backend/context.py
@@ -177,7 +177,7 @@ def _cpu_count_cgroup(os_cpu_count):
             return math.ceil(cpu_quota_us / cpu_period_us)
         else:  # pragma: no cover
             # Setting a negative cpu_quota_us value is a valid way to disable
-            # cgroup CPU bandwith limits
+            # cgroup CPU bandwidth limits
             return os_cpu_count
 
 
@@ -209,7 +209,7 @@ def _cpu_count_affinity(os_cpu_count):
             # havoc, typically on CI workers.
             warnings.warn(
                 "Failed to inspect CPU affinity constraints on this system. "
-                "Please install psutil or explictly set LOKY_MAX_CPU_COUNT."
+                "Please install psutil or explicitly set LOKY_MAX_CPU_COUNT."
             )
 
     # This can happen for platforms that do not implement any kind of CPU

diff --git a/joblib/externals/loky/backend/queues.py b/joblib/externals/loky/backend/queues.py
@@ -197,7 +197,7 @@ class SimpleQueue(mp_SimpleQueue):
     def __init__(self, reducers=None, ctx=None):
         super().__init__(ctx=ctx)
 
-        # Add possiblity to use custom reducers
+        # Add possibility to use custom reducers
         self._reducers = reducers
 
     def close(self):

diff --git a/joblib/externals/loky/backend/resource_tracker.py b/joblib/externals/loky/backend/resource_tracker.py
@@ -40,7 +40,7 @@
 # Note that this behavior differs from CPython's resource_tracker, which only
 # implements list of shared resources, and not a proper refcounting scheme.
 # Also, CPython's resource tracker will only attempt to cleanup those shared
-# resources once all procsses connected to the resouce tracker have exited.
+# resources once all procsses connected to the resource tracker have exited.
 
 
 import os
@@ -112,7 +112,7 @@ def ensure_running(self):
 
                 warnings.warn(
                     "resource_tracker: process died unexpectedly, "
-                    "relaunching.  Some folders/sempahores might "
+                    "relaunching.  Some folders/semaphores might "
                     "leak."
                 )
 
@@ -238,13 +238,13 @@ def main(fd, verbose=0):
                 if line == b"":  # EOF
                     break
                 try:
-                    splitted = line.strip().decode("ascii").split(":")
+                    split = line.strip().decode("ascii").split(":")
                     # name can potentially contain separator symbols (for
                     # instance folders on Windows)
                     cmd, name, rtype = (
-                        splitted[0],
-                        ":".join(splitted[1:-1]),
-                        splitted[-1],
+                        split[0],
+                        ":".join(split[1:-1]),
+                        split[-1],
                     )
 
                     if cmd == "PROBE":

diff --git a/joblib/externals/loky/backend/spawn.py b/joblib/externals/loky/backend/spawn.py
@@ -102,7 +102,7 @@ def get_preparation_data(name, init_main_module=True):
         )
 
         # multiprocessing's resource_tracker must be running before loky
-        # process is created (othewise the child won't be able to use it if it
+        # process is created (otherwise the child won't be able to use it if it
         # is created later on)
         mp_resource_tracker.ensure_running()
         d["mp_tracker_args"] = {

diff --git a/joblib/externals/loky/cloudpickle_wrapper.py b/joblib/externals/loky/cloudpickle_wrapper.py
@@ -19,7 +19,7 @@ def __reduce__(self):
         return _reconstruct_wrapper, (_pickled_object, self._keep_wrapper)
 
     def __getattr__(self, attr):
-        # Ensure that the wrapped object can be used seemlessly as the
+        # Ensure that the wrapped object can be used seamlessly as the
         # previous object.
         if attr not in ["_obj", "_keep_wrapper"]:
             return getattr(self._obj, attr)

diff --git a/joblib/externals/loky/process_executor.py b/joblib/externals/loky/process_executor.py
@@ -983,7 +983,7 @@ def _check_max_depth(context):
     if 0 < MAX_DEPTH and _CURRENT_DEPTH + 1 > MAX_DEPTH:
         raise LokyRecursionError(
             "Could not spawn extra nested processes at depth superior to "
-            f"MAX_DEPTH={MAX_DEPTH}. If this is intendend, you can change "
+            f"MAX_DEPTH={MAX_DEPTH}. If this is intended, you can change "
             "this limit with the LOKY_MAX_DEPTH environment variable."
         )
 

diff --git a/joblib/func_inspect.py b/joblib/func_inspect.py
@@ -140,8 +140,8 @@ def get_func_name(func, resolv_alias=True, win_characters=True):
                 # The XYZ hash should avoid collisions between functions with
                 # the same name, both within the same notebook but also across
                 # notebooks
-                splitted = parts[-1].split('-')
-                parts[-1] = '-'.join(splitted[:2] + splitted[3:])
+                split = parts[-1].split('-')
+                parts[-1] = '-'.join(split[:2] + split[3:])
             elif len(parts) > 2 and parts[-2].startswith('ipykernel_'):
                 # In a notebook session (ipykernel). Filename seems to be 'xyz'
                 # of above. parts[-2] has the structure ipykernel_XXXXXX where

diff --git a/joblib/numpy_pickle_compat.py b/joblib/numpy_pickle_compat.py
@@ -101,7 +101,7 @@ def read(self, unpickler):
         if allow_mmap:
             kwargs['mmap_mode'] = unpickler.mmap_mode
         if "allow_pickle" in inspect.signature(unpickler.np.load).parameters:
-            # Required in numpy 1.16.3 and later to aknowledge the security
+            # Required in numpy 1.16.3 and later to acknowledge the security
             # risk.
             kwargs["allow_pickle"] = True
         array = unpickler.np.load(filename, **kwargs)

diff --git a/joblib/numpy_pickle_utils.py b/joblib/numpy_pickle_utils.py
@@ -90,7 +90,7 @@ def _detect_compressor(fileobj):
     max_prefix_len = _get_prefixes_max_len()
     if hasattr(fileobj, 'peek'):
         # Peek allows to read those bytes without moving the cursor in the
-        # file whic.
+        # file which.
         first_bytes = fileobj.peek(max_prefix_len)
     else:
         # Fallback to seek if the fileobject is not peekable.

diff --git a/joblib/parallel.py b/joblib/parallel.py
@@ -1042,7 +1042,7 @@ class Parallel(Logger):
             The number of batches (of tasks) to be pre-dispatched.
             Default is '2*n_jobs'. When batch_size="auto" this is reasonable
             default and the workers should never starve. Note that only basic
-            arithmetics are allowed here and no modules can be used in this
+            arithmetic are allowed here and no modules can be used in this
             expression.
         batch_size: int or 'auto', default='auto'
             The number of atomic tasks to dispatch at once to each

diff --git a/joblib/test/common.py b/joblib/test/common.py
@@ -66,7 +66,7 @@ def force_gc_pypy():
     # will eventually be collected.
     if IS_PYPY:
         # Run gc.collect() twice to make sure the weakref is collected, as
-        # mentionned in the pypy doc:
+        # mentioned in the pypy doc:
         # https://doc.pypy.org/en/latest/config/objspace.usemodules._weakref.html
         import gc
         gc.collect()

diff --git a/joblib/test/test_disk.py b/joblib/test/test_disk.py
@@ -62,8 +62,8 @@ def test_mkdirp(tmpdir):
 
 
 def test_rm_subdirs(tmpdir):
-    sub_path = os.path.join(tmpdir.strpath, "am", "stram")
-    full_path = os.path.join(sub_path, "gram")
+    sub_path = os.path.join(tmpdir.strpath, "subdir_one", "subdir_two")
+    full_path = os.path.join(sub_path, "subdir_three")
     mkdirp(os.path.join(full_path))
 
     rm_subdirs(sub_path)

diff --git a/joblib/test/test_parallel.py b/joblib/test/test_parallel.py
@@ -1416,7 +1416,7 @@ def test_multiple_generator_call(backend, return_as, n_jobs):
 
     # Make sure that the error is raised quickly
     assert time.time() - t_start < 2, (
-        "The error should be raised immediatly when submitting a new task "
+        "The error should be raised immediately when submitting a new task "
         "but it took more than 2s."
     )
 
@@ -1443,8 +1443,8 @@ def test_multiple_generator_call_managed(backend, return_as, n_jobs):
 
         # Make sure that the error is raised quickly
         assert time.time() - t_start < 2, (
-            "The error should be raised immediatly when submitting a new task "
-            "but it took more than 2s."
+            "The error should be raised immediately when submitting a new task"
+            " but it took more than 2s."
         )
 
     # The gc in pypy can be delayed. Force it to make sure this test does not

diff --git a/pyproject.toml b/pyproject.toml
@@ -100,3 +100,10 @@ relative_files = true
 
 [tool.coverage.report]
 show_missing = true
+
+[tool.codespell]
+# Ref: https://github.com/codespell-project/codespell#using-a-config-file
+skip = '.git*,*.svg,venv,*.css'
+check-hidden = true
+# ignore-regex = ''
+ignore-words-list = 'fo'
diff --git a/setup.cfg b/setup.cfg
@@ -1,4 +1,4 @@
-# The prefered config file is pyproject.toml. The use of setup.cfg is
+# The preferred config file is pyproject.toml. The use of setup.cfg is
 # mostly for compatibility with flake8 so it should not be used if possible.
 
 [flake8]