apply code formatting

rapidfuzz · Apr 11, 2023 · 4fc08a5 · 4fc08a5
1 parent 5e3edde
commit 4fc08a5
Show file tree

Hide file tree

Showing 43 changed files with 212 additions and 179 deletions.
diff --git a/.codespell-ignore-lines b/.codespell-ignore-lines
@@ -1,2 +1,3 @@
-            "C'est la vie",
-            "c est la vie",
+        "C'est la vie",
+        "c est la vie",
+    >>> s2 = "cetain"
diff --git a/.github/workflows/releasebuild.yml b/.github/workflows/releasebuild.yml
@@ -69,7 +69,7 @@ jobs:
             python_tag: "pp38-*"
           - arch: auto32
             python_tag: "pp39-*"
-          
+
           # ARM64 only supported only supported on cpython >= 3.9
           - arch: ARM64
             python_tag: "pp37-*"

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@
 repos:
 # Standard hooks
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: "v4.3.0"
+  rev: "v4.4.0"
   hooks:
   - id: check-added-large-files
   - id: check-case-conflict
@@ -32,53 +32,53 @@ repos:
 
 # Upgrade old Python syntax
 - repo: https://github.com/asottile/pyupgrade
-  rev: "v2.38.2"
+  rev: "v3.3.1"
   hooks:
   - id: pyupgrade
     args: [--py37-plus]
 
 # Nicely sort includes
 - repo: https://github.com/PyCQA/isort
-  rev: "5.10.1"
+  rev: "5.12.0"
   hooks:
   - id: isort
 
 # Black, the code formatter, natively supports pre-commit
 - repo: https://github.com/psf/black
-  rev: "22.8.0" # Keep in sync with blacken-docs
+  rev: "23.3.0" # Keep in sync with blacken-docs
   hooks:
   - id: black
 
 # Also code format the docs
 - repo: https://github.com/asottile/blacken-docs
-  rev: "v1.12.1"
+  rev: "1.13.0"
   hooks:
   - id: blacken-docs
     additional_dependencies:
     - black==22.8.0 # keep in sync with black hook
 
 # Changes tabs to spaces
 - repo: https://github.com/Lucas-C/pre-commit-hooks
-  rev: "v1.3.1"
+  rev: "v1.5.1"
   hooks:
   - id: remove-tabs
 
 - repo: https://github.com/sirosen/texthooks
-  rev: "0.4.0"
+  rev: "0.5.0"
   hooks:
   - id: fix-ligatures
   - id: fix-smartquotes
 
 # Autoremoves unused imports
 - repo: https://github.com/hadialqattan/pycln
-  rev: "v2.1.1"
+  rev: "v2.1.3"
   hooks:
   - id: pycln
     stages: [manual]
 
 # Checking for common mistakes
 - repo: https://github.com/pre-commit/pygrep-hooks
-  rev: "v1.9.0"
+  rev: "v1.10.0"
   hooks:
   - id: python-check-blanket-noqa
   - id: python-check-blanket-type-ignore
@@ -99,15 +99,15 @@ repos:
 
 # Flake8 also supports pre-commit natively (same author)
 - repo: https://github.com/PyCQA/flake8
-  rev: "5.0.4"
+  rev: "6.0.0"
   hooks:
   - id: flake8
     exclude: ^(docs/.*|tools/.*)$
     additional_dependencies: *flake8_dependencies
 
 # PyLint has native support - not always usable, but works for us
 - repo: https://github.com/PyCQA/pylint
-  rev: "v2.15.3"
+  rev: "v3.0.0a6"
   hooks:
   - id: pylint
     files: ^pybind11
@@ -132,7 +132,7 @@ repos:
 
 # Checks the manifest for missing files (native support)
 - repo: https://github.com/mgedmin/check-manifest
-  rev: "0.48"
+  rev: "0.49"
   hooks:
   - id: check-manifest
     # This is a slow hook, so only run this if --hook-stage manual is passed
@@ -143,15 +143,15 @@ repos:
 # Use tools/codespell_ignore_lines_from_errors.py
 # to rebuild .codespell-ignore-lines
 - repo: https://github.com/codespell-project/codespell
-  rev: "v2.2.1"
+  rev: "v2.2.4"
   hooks:
   - id: codespell
     exclude: ".supp$"
     args: ["-x", ".codespell-ignore-lines"]
 
 # Check for common shell mistakes
 - repo: https://github.com/shellcheck-py/shellcheck-py
-  rev: "v0.8.0.4"
+  rev: "v0.9.0.2"
   hooks:
   - id: shellcheck
 
@@ -166,7 +166,7 @@ repos:
 
 # Clang format the codebase automatically
 - repo: https://github.com/pre-commit/mirrors-clang-format
-  rev: "v14.0.6"
+  rev: "v16.0.0"
   hooks:
   - id: clang-format
     types_or: [c++, c]
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -11,18 +11,19 @@ project(rapidfuzz LANGUAGES C CXX)
 
 include(CMakePrintHelpers)
 cmake_print_variables(CMAKE_AR CMAKE_C_COMPILER_AR CMAKE_CXX_COMPILER_AR)
-cmake_print_variables(CMAKE_RANLIB CMAKE_C_COMPILER_RANLIB CMAKE_CXX_COMPILER_RANLIB)
+cmake_print_variables(CMAKE_RANLIB CMAKE_C_COMPILER_RANLIB
+                      CMAKE_CXX_COMPILER_RANLIB)
 
-if ("${CMAKE_C_COMPILER_AR}" STREQUAL "")
+if("${CMAKE_C_COMPILER_AR}" STREQUAL "")
   set(CMAKE_C_COMPILER_AR "${CMAKE_AR}")
 endif()
-if ("${CMAKE_CXX_COMPILER_AR}" STREQUAL "")
+if("${CMAKE_CXX_COMPILER_AR}" STREQUAL "")
   set(CMAKE_CXX_COMPILER_AR "${CMAKE_AR}")
 endif()
-if ("${CMAKE_C_COMPILER_RANLIB}" STREQUAL "")
+if("${CMAKE_C_COMPILER_RANLIB}" STREQUAL "")
   set(CMAKE_C_COMPILER_RANLIB "${CMAKE_RANLIB}")
 endif()
-if ("${CMAKE_CXX_COMPILER_RANLIB}" STREQUAL "")
+if("${CMAKE_CXX_COMPILER_RANLIB}" STREQUAL "")
   set(CMAKE_CXX_COMPILER_RANLIB "${CMAKE_RANLIB}")
 endif()
 

diff --git a/bench/benchmark.py b/bench/benchmark.py
@@ -22,9 +22,7 @@
 
 import click
 import matplotlib.pyplot as plt
-import numpy as np
 import pandas as pd
-from common import benchmark, find_versions
 from packaging.version import Version
 from tqdm import tqdm
 
@@ -52,7 +50,7 @@ def benchmark(result, setup, func, queries, choices):
 # - a custom range
 # - only the installed version
 def get_rapidfuzz_versions(version):
-    url = f"https://pypi.org/pypi/rapidfuzz/json"
+    url = "https://pypi.org/pypi/rapidfuzz/json"
     data = json.load(urlopen(url))
     versions = list(data["releases"].keys())
     versions.sort(key=Version, reverse=True)
@@ -115,8 +113,8 @@ def run_benchmarks_rapidfuzz(rapidfuzz_version, func_name, dataset, result_df):
     func = "[[scorer(a, b) for a in queries] for b in choices]"
 
     if rapidfuzz_version == "current":
-        print(f"Benchmarking rapidfuzz")
-        result_df[f"rapidfuzz"] = run_benchmark(dataset, setup, func)
+        print("Benchmarking rapidfuzz")
+        result_df["rapidfuzz"] = run_benchmark(dataset, setup, func)
     else:
         rapidfuzz_versions = get_rapidfuzz_versions(rapidfuzz_version)
         os.environ["RAPIDFUZZ_BUILD_EXTENSION"] = "1"
@@ -153,53 +151,55 @@ def run_benchmarks_jellyfish(func_name, result_df, dataset):
     if func_name not in JELLYFISH_SCORERS:
         return
 
-    print(f"Benchmarking jellyfish")
+    print("Benchmarking jellyfish")
     func = f"[[jellyfish.{JELLYFISH_SCORERS[func_name]}(a, b) for a in queries] for b in choices]"
-    result_df[f"jellyfish"] = run_benchmark(dataset, setup, func)
+    result_df["jellyfish"] = run_benchmark(dataset, setup, func)
 
 
 def run_benchmarks_polyleven(func_name, result_df, dataset):
     if func_name != "Levenshtein":
         return
 
-    print(f"Benchmarking polyleven")
+    print("Benchmarking polyleven")
     setup = "import polyleven"
-    func = f"[[polyleven.levenshtein(a, b) for a in queries] for b in choices]"
-    result_df[f"polyleven"] = run_benchmark(dataset, setup, func)
+    func = "[[polyleven.levenshtein(a, b) for a in queries] for b in choices]"
+    result_df["polyleven"] = run_benchmark(dataset, setup, func)
 
 
 def run_benchmarks_edlib(func_name, result_df, dataset):
     if func_name != "Levenshtein":
         return
 
-    print(f"Benchmarking edlib")
+    print("Benchmarking edlib")
     setup = "import edlib"
-    func = f"[[edlib.align(a, b) for a in queries] for b in choices]"
-    result_df[f"edlib"] = run_benchmark(dataset, setup, func)
+    func = "[[edlib.align(a, b) for a in queries] for b in choices]"
+    result_df["edlib"] = run_benchmark(dataset, setup, func)
 
-    print(f"Benchmarking edlib(k=max)")
-    func = f"[[edlib.align(a, b, k=max(len(a), len(b))) for a in queries] for b in choices]"
-    result_df[f"edlib(k=max)"] = run_benchmark(dataset, setup, func)
+    print("Benchmarking edlib(k=max)")
+    func = (
+        "[[edlib.align(a, b, k=max(len(a), len(b))) for a in queries] for b in choices]"
+    )
+    result_df["edlib(k=max)"] = run_benchmark(dataset, setup, func)
 
 
 def run_benchmarks_editdistance(func_name, result_df, dataset):
     if func_name != "Levenshtein":
         return
 
-    print(f"Benchmarking editdistance")
+    print("Benchmarking editdistance")
     setup = "import editdistance"
-    func = f"[[editdistance.eval(a, b) for a in queries] for b in choices]"
-    result_df[f"editdistance"] = run_benchmark(dataset, setup, func)
+    func = "[[editdistance.eval(a, b) for a in queries] for b in choices]"
+    result_df["editdistance"] = run_benchmark(dataset, setup, func)
 
 
 def run_benchmarks_pyxdameraulevenshtein(func_name, result_df, dataset):
     if func_name != "OSA":
         return
 
-    print(f"Benchmarking pyxdameraulevenshtein")
+    print("Benchmarking pyxdameraulevenshtein")
     setup = "import pyxdameraulevenshtein"
-    func = f"[[pyxdameraulevenshtein.damerau_levenshtein_distance(a, b) for a in queries] for b in choices]"
-    result_df[f"pyxdameraulevenshtein"] = run_benchmark(dataset, setup, func)
+    func = "[[pyxdameraulevenshtein.damerau_levenshtein_distance(a, b) for a in queries] for b in choices]"
+    result_df["pyxdameraulevenshtein"] = run_benchmark(dataset, setup, func)
 
 
 AVAILABLE_BENCHMARKS = [

diff --git a/bench/benchmark_visualize.py b/bench/benchmark_visualize.py
@@ -9,15 +9,15 @@
 
 ax = df.plot(x="x_axis")
 
-#plt.xticks(list(range(0, 64*20+1, 64)))
+# plt.xticks(list(range(0, 64*20+1, 64)))
 
 plt.title(
     "Performance comparison of the \nDamerauLevenshtein similarity in different libraries"
 )
 plt.xlabel("string length [in characters]")
 plt.ylabel("runtime [μs]")
 ax.set_xlim(xmin=0)
-#ax.set_ylim(bottom=0)
+# ax.set_ylim(bottom=0)
 # ax.set_yscale('log')
 plt.grid()
 plt.show()
diff --git a/bench/common.py b/bench/common.py
@@ -1,16 +1,18 @@
-import timeit
-
 import json
+import timeit
 from urllib.request import urlopen
+
 from packaging.version import Version
 
+
 def find_versions(package_name):
     url = f"https://pypi.org/pypi/{package_name}/json"
     data = json.load(urlopen(url))
     versions = list(data["releases"].keys())
     versions.sort(key=Version, reverse=True)
     return versions
 
+
 def benchmark(name, func, setup, lengths, count):
     print(f"starting {name}")
     start = timeit.default_timer()

diff --git a/bench/requirements.txt b/bench/requirements.txt
@@ -1,11 +1,10 @@
-tqdm
-numpy
+editdistance
+edlib
+jellyfish
 matplotlib
+numpy
 pandas
-jellyfish
+polyleven
 pyxdameraulevenshtein
 thefuzz
-editdistance
-edlib
-polyleven
-
+tqdm
diff --git a/docs/index.rst b/docs/index.rst
@@ -44,4 +44,4 @@ Distributed under a :ref:`MIT License <my-license-label>`, RapidFuzz is develope
    References
    GitHub Repository <https://github.com/maxbachmann/RapidFuzz>
 
-.. |header-image| image:: img/RapidFuzz.svg
+.. |header-image| image:: img/RapidFuzz.svg
diff --git a/setup.cfg b/setup.cfg
@@ -2,5 +2,5 @@
 max-line-length = 120
 show_source = True
 exclude = .git, __pycache__, build, dist, docs, tools, venv
-extend-ignore = E203, E722, B903, B950, N801, N802, N806
+extend-ignore = E203, E722, B903, B950, N801, N802, N806, B905, B907
 extend-select = B9
diff --git a/src/rapidfuzz/CMakeLists.txt b/src/rapidfuzz/CMakeLists.txt
@@ -33,9 +33,11 @@ endfunction(rf_add_library)
 
 if(RAPIDFUZZ_ARCH_X64)
   create_cython_target(_feature_detector_cpp)
-  rf_add_library(_feature_detector_cpp ${_feature_detector_cpp} ${CMAKE_CURRENT_LIST_DIR}/FeatureDetector/CpuInfo.cpp)
+  rf_add_library(_feature_detector_cpp ${_feature_detector_cpp}
+                 ${CMAKE_CURRENT_LIST_DIR}/FeatureDetector/CpuInfo.cpp)
   target_compile_features(_feature_detector_cpp PUBLIC cxx_std_17)
-  target_include_directories(_feature_detector_cpp PRIVATE ${RF_BASE_DIR}/rapidfuzz)
+  target_include_directories(_feature_detector_cpp
+                             PRIVATE ${RF_BASE_DIR}/rapidfuzz)
   target_link_libraries(_feature_detector_cpp PRIVATE rapidfuzz::rapidfuzz)
   install(TARGETS _feature_detector_cpp LIBRARY DESTINATION src/rapidfuzz)
 endif()
@@ -60,12 +62,14 @@ if(RAPIDFUZZ_ARCH_X64)
   target_compile_features(fuzz_cpp_impl_avx2 PUBLIC cxx_std_17)
 
   if(MSVC)
-    set_target_properties(fuzz_cpp_impl_avx2 PROPERTIES COMPILE_FLAGS "/arch:AVX2")
+    set_target_properties(fuzz_cpp_impl_avx2 PROPERTIES COMPILE_FLAGS
+                                                        "/arch:AVX2")
   else()
     set_target_properties(fuzz_cpp_impl_avx2 PROPERTIES COMPILE_FLAGS "-mavx2")
   endif()
 
-  target_include_directories(fuzz_cpp_impl_avx2 PRIVATE ${RF_BASE_DIR}/rapidfuzz)
+  target_include_directories(fuzz_cpp_impl_avx2
+                             PRIVATE ${RF_BASE_DIR}/rapidfuzz)
   target_link_libraries(fuzz_cpp_impl_avx2 PRIVATE rapidfuzz::rapidfuzz)
   install(TARGETS fuzz_cpp_impl_avx2 LIBRARY DESTINATION src/rapidfuzz)
 endif()

diff --git a/src/rapidfuzz/_feature_detector.py b/src/rapidfuzz/_feature_detector.py
@@ -2,11 +2,13 @@
 # Copyright (C) 2022 Max Bachmann
 from __future__ import annotations
 
+__all__ = ["AVX2", "SSE2", "supports"]
+
 try:
     from rapidfuzz._feature_detector_cpp import AVX2, SSE2, supports
-except:
+except ImportError:
     SSE2 = 1
     AVX2 = 2
 
     def supports(features):
-        return False
+        return False
diff --git a/src/rapidfuzz/_feature_detector_cpp.pyx b/src/rapidfuzz/_feature_detector_cpp.pyx
@@ -3,6 +3,7 @@
 
 from libc.stdint cimport uint32_t
 
+
 cdef extern from "FeatureDetector/CpuInfo.hpp":
     cdef int CPU_FEATURE_SSE2
     cdef int CPU_FEATURE_AVX2