diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 000000000..b23166743 --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,25 @@ +# Codespell configuration is within pyproject.toml +--- +name: Codespell + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Annotate locations with typos + uses: codespell-project/codespell-problem-matcher@v1 + - name: Codespell + uses: codespell-project/actions-codespell@v2 diff --git a/CHANGES.rst b/CHANGES.rst index 996485615..6d4e92318 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -731,7 +731,7 @@ Olivier Grisel Expose :class:`joblib.parallel.ParallelBackendBase` and :class:`joblib.parallel.AutoBatchingMixin` in the public API to - make them officially re-usable by backend implementers. + make them officially reusable by backend implementers. Release 0.10.0 @@ -866,7 +866,7 @@ Olivier Grisel Olivier Grisel - New context manager based API (``with`` block) to re-use + New context manager based API (``with`` block) to reuse the same pool of workers across consecutive parallel calls. Vlad Niculae and Olivier Grisel diff --git a/doc/memory.rst b/doc/memory.rst index cdce39055..a692f2fa4 100644 --- a/doc/memory.rst +++ b/doc/memory.rst @@ -385,7 +385,7 @@ Gotchas * **Cache-miss with objects that have non-reproducible pickle representations**. The identifier of the cache entry is based on the pickle's representation of - the input arguments. Therefor, for objects that don't have a deterministic + the input arguments. Therefore, for objects that don't have a deterministic pickle representation, or objects whose representation depends on the way they are constructed, the cache will not work. In particular, ``pytorch.Tensor`` are known to have non-deterministic pickle representation (see this diff --git a/doc/parallel.rst b/doc/parallel.rst index cd7298506..b44e78826 100644 --- a/doc/parallel.rst +++ b/doc/parallel.rst @@ -173,7 +173,7 @@ create and destroy a pool of workers (threads or processes) several times which can cause a significant overhead. For this case it is more efficient to use the context manager API of the -:class:`joblib.Parallel` class to re-use the same pool of workers for several +:class:`joblib.Parallel` class to reuse the same pool of workers for several calls to the :class:`joblib.Parallel` object:: >>> with Parallel(n_jobs=2) as parallel: diff --git a/examples/parallel_generator.py b/examples/parallel_generator.py index e41ff1fe5..065c33953 100644 --- a/examples/parallel_generator.py +++ b/examples/parallel_generator.py @@ -146,7 +146,7 @@ def accumulator_sum(generator): ############################################################################## -# We can then report the memory usage accross time of the two runs using the +# We can then report the memory usage across time of the two runs using the # MemoryMonitor. # # In the first case, as the results accumulate in ``res``, the memory grows @@ -211,7 +211,7 @@ def accumulator_sum(generator): ############################################################################## # To better highlight improvements in memory usage when using the parameter -# ``return_as="generator_unordered"``, let's explcitly add delay in some of +# ``return_as="generator_unordered"``, let's explicitly add delay in some of # the submitted tasks. diff --git a/joblib/externals/loky/backend/context.py b/joblib/externals/loky/backend/context.py index d0f590317..ea4b8bea2 100644 --- a/joblib/externals/loky/backend/context.py +++ b/joblib/externals/loky/backend/context.py @@ -177,7 +177,7 @@ def _cpu_count_cgroup(os_cpu_count): return math.ceil(cpu_quota_us / cpu_period_us) else: # pragma: no cover # Setting a negative cpu_quota_us value is a valid way to disable - # cgroup CPU bandwith limits + # cgroup CPU bandwidth limits return os_cpu_count @@ -209,7 +209,7 @@ def _cpu_count_affinity(os_cpu_count): # havoc, typically on CI workers. warnings.warn( "Failed to inspect CPU affinity constraints on this system. " - "Please install psutil or explictly set LOKY_MAX_CPU_COUNT." + "Please install psutil or explicitly set LOKY_MAX_CPU_COUNT." ) # This can happen for platforms that do not implement any kind of CPU diff --git a/joblib/externals/loky/backend/queues.py b/joblib/externals/loky/backend/queues.py index 5afd99b42..e92ad8220 100644 --- a/joblib/externals/loky/backend/queues.py +++ b/joblib/externals/loky/backend/queues.py @@ -197,7 +197,7 @@ class SimpleQueue(mp_SimpleQueue): def __init__(self, reducers=None, ctx=None): super().__init__(ctx=ctx) - # Add possiblity to use custom reducers + # Add possibility to use custom reducers self._reducers = reducers def close(self): diff --git a/joblib/externals/loky/backend/resource_tracker.py b/joblib/externals/loky/backend/resource_tracker.py index 25204a7a7..36454cbed 100644 --- a/joblib/externals/loky/backend/resource_tracker.py +++ b/joblib/externals/loky/backend/resource_tracker.py @@ -40,7 +40,7 @@ # Note that this behavior differs from CPython's resource_tracker, which only # implements list of shared resources, and not a proper refcounting scheme. # Also, CPython's resource tracker will only attempt to cleanup those shared -# resources once all procsses connected to the resouce tracker have exited. +# resources once all procsses connected to the resource tracker have exited. import os @@ -112,7 +112,7 @@ def ensure_running(self): warnings.warn( "resource_tracker: process died unexpectedly, " - "relaunching. Some folders/sempahores might " + "relaunching. Some folders/semaphores might " "leak." ) @@ -238,13 +238,13 @@ def main(fd, verbose=0): if line == b"": # EOF break try: - splitted = line.strip().decode("ascii").split(":") + split = line.strip().decode("ascii").split(":") # name can potentially contain separator symbols (for # instance folders on Windows) cmd, name, rtype = ( - splitted[0], - ":".join(splitted[1:-1]), - splitted[-1], + split[0], + ":".join(split[1:-1]), + split[-1], ) if cmd == "PROBE": diff --git a/joblib/externals/loky/backend/spawn.py b/joblib/externals/loky/backend/spawn.py index d011c3980..59fc89029 100644 --- a/joblib/externals/loky/backend/spawn.py +++ b/joblib/externals/loky/backend/spawn.py @@ -102,7 +102,7 @@ def get_preparation_data(name, init_main_module=True): ) # multiprocessing's resource_tracker must be running before loky - # process is created (othewise the child won't be able to use it if it + # process is created (otherwise the child won't be able to use it if it # is created later on) mp_resource_tracker.ensure_running() d["mp_tracker_args"] = { diff --git a/joblib/externals/loky/cloudpickle_wrapper.py b/joblib/externals/loky/cloudpickle_wrapper.py index 099debcb7..387b07dd1 100644 --- a/joblib/externals/loky/cloudpickle_wrapper.py +++ b/joblib/externals/loky/cloudpickle_wrapper.py @@ -19,7 +19,7 @@ def __reduce__(self): return _reconstruct_wrapper, (_pickled_object, self._keep_wrapper) def __getattr__(self, attr): - # Ensure that the wrapped object can be used seemlessly as the + # Ensure that the wrapped object can be used seamlessly as the # previous object. if attr not in ["_obj", "_keep_wrapper"]: return getattr(self._obj, attr) diff --git a/joblib/externals/loky/process_executor.py b/joblib/externals/loky/process_executor.py index 304071957..2576bf4b2 100644 --- a/joblib/externals/loky/process_executor.py +++ b/joblib/externals/loky/process_executor.py @@ -983,7 +983,7 @@ def _check_max_depth(context): if 0 < MAX_DEPTH and _CURRENT_DEPTH + 1 > MAX_DEPTH: raise LokyRecursionError( "Could not spawn extra nested processes at depth superior to " - f"MAX_DEPTH={MAX_DEPTH}. If this is intendend, you can change " + f"MAX_DEPTH={MAX_DEPTH}. If this is intended, you can change " "this limit with the LOKY_MAX_DEPTH environment variable." ) diff --git a/joblib/func_inspect.py b/joblib/func_inspect.py index 3f8094614..9b8113619 100644 --- a/joblib/func_inspect.py +++ b/joblib/func_inspect.py @@ -140,8 +140,8 @@ def get_func_name(func, resolv_alias=True, win_characters=True): # The XYZ hash should avoid collisions between functions with # the same name, both within the same notebook but also across # notebooks - splitted = parts[-1].split('-') - parts[-1] = '-'.join(splitted[:2] + splitted[3:]) + split = parts[-1].split('-') + parts[-1] = '-'.join(split[:2] + split[3:]) elif len(parts) > 2 and parts[-2].startswith('ipykernel_'): # In a notebook session (ipykernel). Filename seems to be 'xyz' # of above. parts[-2] has the structure ipykernel_XXXXXX where diff --git a/joblib/numpy_pickle_compat.py b/joblib/numpy_pickle_compat.py index 32612849b..b819e26a6 100644 --- a/joblib/numpy_pickle_compat.py +++ b/joblib/numpy_pickle_compat.py @@ -101,7 +101,7 @@ def read(self, unpickler): if allow_mmap: kwargs['mmap_mode'] = unpickler.mmap_mode if "allow_pickle" in inspect.signature(unpickler.np.load).parameters: - # Required in numpy 1.16.3 and later to aknowledge the security + # Required in numpy 1.16.3 and later to acknowledge the security # risk. kwargs["allow_pickle"] = True array = unpickler.np.load(filename, **kwargs) diff --git a/joblib/numpy_pickle_utils.py b/joblib/numpy_pickle_utils.py index 23cfb34ec..c09616b7e 100644 --- a/joblib/numpy_pickle_utils.py +++ b/joblib/numpy_pickle_utils.py @@ -90,7 +90,7 @@ def _detect_compressor(fileobj): max_prefix_len = _get_prefixes_max_len() if hasattr(fileobj, 'peek'): # Peek allows to read those bytes without moving the cursor in the - # file whic. + # file which. first_bytes = fileobj.peek(max_prefix_len) else: # Fallback to seek if the fileobject is not peekable. diff --git a/joblib/parallel.py b/joblib/parallel.py index 7a6323088..9a56550d1 100644 --- a/joblib/parallel.py +++ b/joblib/parallel.py @@ -1042,7 +1042,7 @@ class Parallel(Logger): The number of batches (of tasks) to be pre-dispatched. Default is '2*n_jobs'. When batch_size="auto" this is reasonable default and the workers should never starve. Note that only basic - arithmetics are allowed here and no modules can be used in this + arithmetic are allowed here and no modules can be used in this expression. batch_size: int or 'auto', default='auto' The number of atomic tasks to dispatch at once to each diff --git a/joblib/test/common.py b/joblib/test/common.py index c0133f59b..612cbee02 100644 --- a/joblib/test/common.py +++ b/joblib/test/common.py @@ -66,7 +66,7 @@ def force_gc_pypy(): # will eventually be collected. if IS_PYPY: # Run gc.collect() twice to make sure the weakref is collected, as - # mentionned in the pypy doc: + # mentioned in the pypy doc: # https://doc.pypy.org/en/latest/config/objspace.usemodules._weakref.html import gc gc.collect() diff --git a/joblib/test/test_disk.py b/joblib/test/test_disk.py index b825a8b3a..0681f4b1a 100644 --- a/joblib/test/test_disk.py +++ b/joblib/test/test_disk.py @@ -62,8 +62,8 @@ def test_mkdirp(tmpdir): def test_rm_subdirs(tmpdir): - sub_path = os.path.join(tmpdir.strpath, "am", "stram") - full_path = os.path.join(sub_path, "gram") + sub_path = os.path.join(tmpdir.strpath, "subdir_one", "subdir_two") + full_path = os.path.join(sub_path, "subdir_three") mkdirp(os.path.join(full_path)) rm_subdirs(sub_path) diff --git a/joblib/test/test_parallel.py b/joblib/test/test_parallel.py index 2242c076d..9017f061c 100644 --- a/joblib/test/test_parallel.py +++ b/joblib/test/test_parallel.py @@ -1416,7 +1416,7 @@ def test_multiple_generator_call(backend, return_as, n_jobs): # Make sure that the error is raised quickly assert time.time() - t_start < 2, ( - "The error should be raised immediatly when submitting a new task " + "The error should be raised immediately when submitting a new task " "but it took more than 2s." ) @@ -1443,8 +1443,8 @@ def test_multiple_generator_call_managed(backend, return_as, n_jobs): # Make sure that the error is raised quickly assert time.time() - t_start < 2, ( - "The error should be raised immediatly when submitting a new task " - "but it took more than 2s." + "The error should be raised immediately when submitting a new task" + " but it took more than 2s." ) # The gc in pypy can be delayed. Force it to make sure this test does not diff --git a/pyproject.toml b/pyproject.toml index 207f64f3d..2a577a397 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,3 +100,10 @@ relative_files = true [tool.coverage.report] show_missing = true + +[tool.codespell] +# Ref: https://github.com/codespell-project/codespell#using-a-config-file +skip = '.git*,*.svg,venv,*.css' +check-hidden = true +# ignore-regex = '' +ignore-words-list = 'fo' diff --git a/setup.cfg b/setup.cfg index 56e8f46a6..6e180509c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,4 +1,4 @@ -# The prefered config file is pyproject.toml. The use of setup.cfg is +# The preferred config file is pyproject.toml. The use of setup.cfg is # mostly for compatibility with flake8 so it should not be used if possible. [flake8]