From 4fc08a5df5f5b82d06be960542b13c5fd4982b6e Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Tue, 11 Apr 2023 02:13:26 +0200 Subject: [PATCH] apply code formatting --- .codespell-ignore-lines | 5 +- .github/workflows/releasebuild.yml | 2 +- .pre-commit-config.yaml | 30 ++++---- CMakeLists.txt | 11 +-- bench/benchmark.py | 44 ++++++------ bench/benchmark_visualize.py | 4 +- bench/common.py | 6 +- bench/requirements.txt | 13 ++-- docs/index.rst | 2 +- setup.cfg | 2 +- src/rapidfuzz/CMakeLists.txt | 12 ++-- src/rapidfuzz/_feature_detector.py | 6 +- src/rapidfuzz/_feature_detector_cpp.pyx | 1 + src/rapidfuzz/_utils.py | 4 +- src/rapidfuzz/cpp_common.hpp | 6 +- src/rapidfuzz/distance/CMakeLists.txt | 6 +- .../distance/DamerauLevenshtein_cpp.py | 14 ++-- src/rapidfuzz/distance/Hamming_cpp.py | 10 +-- src/rapidfuzz/distance/Indel_cpp.py | 10 +-- src/rapidfuzz/distance/JaroWinkler_cpp.py | 6 +- src/rapidfuzz/distance/Jaro_cpp.py | 2 +- src/rapidfuzz/distance/LCSseq_cpp.py | 10 +-- src/rapidfuzz/distance/Levenshtein.py | 2 + src/rapidfuzz/distance/Levenshtein_cpp.py | 10 +-- src/rapidfuzz/distance/OSA_cpp.py | 2 +- src/rapidfuzz/distance/Postfix_cpp.py | 2 +- src/rapidfuzz/distance/Prefix_cpp.py | 2 +- src/rapidfuzz/distance/_initialize.pyi | 1 - src/rapidfuzz/distance/metrics.hpp | 69 +++++++++---------- src/rapidfuzz/distance/metrics_cpp_avx2.pyx | 2 +- src/rapidfuzz/fuzz.pyi | 2 +- src/rapidfuzz/fuzz_cpp.hpp | 11 ++- src/rapidfuzz/fuzz_cpp.py | 46 ++++++++++--- src/rapidfuzz/fuzz_cpp_impl_avx2.pyx | 2 +- src/rapidfuzz/fuzz_py.py | 4 +- src/rapidfuzz/process_py.py | 3 +- tests/distance/test_Jaro.py | 1 + tests/distance/test_JaroWinkler.py | 1 + tests/distance/test_OSA.py | 2 +- tests/distance/test_distance.py | 1 + tests/test_fuzz.py | 3 +- tests/test_hypothesis.py | 9 +-- tests/test_process.py | 10 +-- 43 files changed, 212 insertions(+), 179 deletions(-) diff --git a/.codespell-ignore-lines b/.codespell-ignore-lines index cb5eeb7a..618378b1 100644 --- a/.codespell-ignore-lines +++ b/.codespell-ignore-lines @@ -1,2 +1,3 @@ - "C'est la vie", - "c est la vie", + "C'est la vie", + "c est la vie", + >>> s2 = "cetain" diff --git a/.github/workflows/releasebuild.yml b/.github/workflows/releasebuild.yml index d659a093..ff1c3969 100644 --- a/.github/workflows/releasebuild.yml +++ b/.github/workflows/releasebuild.yml @@ -69,7 +69,7 @@ jobs: python_tag: "pp38-*" - arch: auto32 python_tag: "pp39-*" - + # ARM64 only supported only supported on cpython >= 3.9 - arch: ARM64 python_tag: "pp37-*" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index afcd3a3b..1646ded9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: # Standard hooks - repo: https://github.com/pre-commit/pre-commit-hooks - rev: "v4.3.0" + rev: "v4.4.0" hooks: - id: check-added-large-files - id: check-case-conflict @@ -32,26 +32,26 @@ repos: # Upgrade old Python syntax - repo: https://github.com/asottile/pyupgrade - rev: "v2.38.2" + rev: "v3.3.1" hooks: - id: pyupgrade args: [--py37-plus] # Nicely sort includes - repo: https://github.com/PyCQA/isort - rev: "5.10.1" + rev: "5.12.0" hooks: - id: isort # Black, the code formatter, natively supports pre-commit - repo: https://github.com/psf/black - rev: "22.8.0" # Keep in sync with blacken-docs + rev: "23.3.0" # Keep in sync with blacken-docs hooks: - id: black # Also code format the docs - repo: https://github.com/asottile/blacken-docs - rev: "v1.12.1" + rev: "1.13.0" hooks: - id: blacken-docs additional_dependencies: @@ -59,26 +59,26 @@ repos: # Changes tabs to spaces - repo: https://github.com/Lucas-C/pre-commit-hooks - rev: "v1.3.1" + rev: "v1.5.1" hooks: - id: remove-tabs - repo: https://github.com/sirosen/texthooks - rev: "0.4.0" + rev: "0.5.0" hooks: - id: fix-ligatures - id: fix-smartquotes # Autoremoves unused imports - repo: https://github.com/hadialqattan/pycln - rev: "v2.1.1" + rev: "v2.1.3" hooks: - id: pycln stages: [manual] # Checking for common mistakes - repo: https://github.com/pre-commit/pygrep-hooks - rev: "v1.9.0" + rev: "v1.10.0" hooks: - id: python-check-blanket-noqa - id: python-check-blanket-type-ignore @@ -99,7 +99,7 @@ repos: # Flake8 also supports pre-commit natively (same author) - repo: https://github.com/PyCQA/flake8 - rev: "5.0.4" + rev: "6.0.0" hooks: - id: flake8 exclude: ^(docs/.*|tools/.*)$ @@ -107,7 +107,7 @@ repos: # PyLint has native support - not always usable, but works for us - repo: https://github.com/PyCQA/pylint - rev: "v2.15.3" + rev: "v3.0.0a6" hooks: - id: pylint files: ^pybind11 @@ -132,7 +132,7 @@ repos: # Checks the manifest for missing files (native support) - repo: https://github.com/mgedmin/check-manifest - rev: "0.48" + rev: "0.49" hooks: - id: check-manifest # This is a slow hook, so only run this if --hook-stage manual is passed @@ -143,7 +143,7 @@ repos: # Use tools/codespell_ignore_lines_from_errors.py # to rebuild .codespell-ignore-lines - repo: https://github.com/codespell-project/codespell - rev: "v2.2.1" + rev: "v2.2.4" hooks: - id: codespell exclude: ".supp$" @@ -151,7 +151,7 @@ repos: # Check for common shell mistakes - repo: https://github.com/shellcheck-py/shellcheck-py - rev: "v0.8.0.4" + rev: "v0.9.0.2" hooks: - id: shellcheck @@ -166,7 +166,7 @@ repos: # Clang format the codebase automatically - repo: https://github.com/pre-commit/mirrors-clang-format - rev: "v14.0.6" + rev: "v16.0.0" hooks: - id: clang-format types_or: [c++, c] diff --git a/CMakeLists.txt b/CMakeLists.txt index 24da57f1..bb1ecda2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,18 +11,19 @@ project(rapidfuzz LANGUAGES C CXX) include(CMakePrintHelpers) cmake_print_variables(CMAKE_AR CMAKE_C_COMPILER_AR CMAKE_CXX_COMPILER_AR) -cmake_print_variables(CMAKE_RANLIB CMAKE_C_COMPILER_RANLIB CMAKE_CXX_COMPILER_RANLIB) +cmake_print_variables(CMAKE_RANLIB CMAKE_C_COMPILER_RANLIB + CMAKE_CXX_COMPILER_RANLIB) -if ("${CMAKE_C_COMPILER_AR}" STREQUAL "") +if("${CMAKE_C_COMPILER_AR}" STREQUAL "") set(CMAKE_C_COMPILER_AR "${CMAKE_AR}") endif() -if ("${CMAKE_CXX_COMPILER_AR}" STREQUAL "") +if("${CMAKE_CXX_COMPILER_AR}" STREQUAL "") set(CMAKE_CXX_COMPILER_AR "${CMAKE_AR}") endif() -if ("${CMAKE_C_COMPILER_RANLIB}" STREQUAL "") +if("${CMAKE_C_COMPILER_RANLIB}" STREQUAL "") set(CMAKE_C_COMPILER_RANLIB "${CMAKE_RANLIB}") endif() -if ("${CMAKE_CXX_COMPILER_RANLIB}" STREQUAL "") +if("${CMAKE_CXX_COMPILER_RANLIB}" STREQUAL "") set(CMAKE_CXX_COMPILER_RANLIB "${CMAKE_RANLIB}") endif() diff --git a/bench/benchmark.py b/bench/benchmark.py index 86ccd53f..8bc0c035 100644 --- a/bench/benchmark.py +++ b/bench/benchmark.py @@ -22,9 +22,7 @@ import click import matplotlib.pyplot as plt -import numpy as np import pandas as pd -from common import benchmark, find_versions from packaging.version import Version from tqdm import tqdm @@ -52,7 +50,7 @@ def benchmark(result, setup, func, queries, choices): # - a custom range # - only the installed version def get_rapidfuzz_versions(version): - url = f"https://pypi.org/pypi/rapidfuzz/json" + url = "https://pypi.org/pypi/rapidfuzz/json" data = json.load(urlopen(url)) versions = list(data["releases"].keys()) versions.sort(key=Version, reverse=True) @@ -115,8 +113,8 @@ def run_benchmarks_rapidfuzz(rapidfuzz_version, func_name, dataset, result_df): func = "[[scorer(a, b) for a in queries] for b in choices]" if rapidfuzz_version == "current": - print(f"Benchmarking rapidfuzz") - result_df[f"rapidfuzz"] = run_benchmark(dataset, setup, func) + print("Benchmarking rapidfuzz") + result_df["rapidfuzz"] = run_benchmark(dataset, setup, func) else: rapidfuzz_versions = get_rapidfuzz_versions(rapidfuzz_version) os.environ["RAPIDFUZZ_BUILD_EXTENSION"] = "1" @@ -153,53 +151,55 @@ def run_benchmarks_jellyfish(func_name, result_df, dataset): if func_name not in JELLYFISH_SCORERS: return - print(f"Benchmarking jellyfish") + print("Benchmarking jellyfish") func = f"[[jellyfish.{JELLYFISH_SCORERS[func_name]}(a, b) for a in queries] for b in choices]" - result_df[f"jellyfish"] = run_benchmark(dataset, setup, func) + result_df["jellyfish"] = run_benchmark(dataset, setup, func) def run_benchmarks_polyleven(func_name, result_df, dataset): if func_name != "Levenshtein": return - print(f"Benchmarking polyleven") + print("Benchmarking polyleven") setup = "import polyleven" - func = f"[[polyleven.levenshtein(a, b) for a in queries] for b in choices]" - result_df[f"polyleven"] = run_benchmark(dataset, setup, func) + func = "[[polyleven.levenshtein(a, b) for a in queries] for b in choices]" + result_df["polyleven"] = run_benchmark(dataset, setup, func) def run_benchmarks_edlib(func_name, result_df, dataset): if func_name != "Levenshtein": return - print(f"Benchmarking edlib") + print("Benchmarking edlib") setup = "import edlib" - func = f"[[edlib.align(a, b) for a in queries] for b in choices]" - result_df[f"edlib"] = run_benchmark(dataset, setup, func) + func = "[[edlib.align(a, b) for a in queries] for b in choices]" + result_df["edlib"] = run_benchmark(dataset, setup, func) - print(f"Benchmarking edlib(k=max)") - func = f"[[edlib.align(a, b, k=max(len(a), len(b))) for a in queries] for b in choices]" - result_df[f"edlib(k=max)"] = run_benchmark(dataset, setup, func) + print("Benchmarking edlib(k=max)") + func = ( + "[[edlib.align(a, b, k=max(len(a), len(b))) for a in queries] for b in choices]" + ) + result_df["edlib(k=max)"] = run_benchmark(dataset, setup, func) def run_benchmarks_editdistance(func_name, result_df, dataset): if func_name != "Levenshtein": return - print(f"Benchmarking editdistance") + print("Benchmarking editdistance") setup = "import editdistance" - func = f"[[editdistance.eval(a, b) for a in queries] for b in choices]" - result_df[f"editdistance"] = run_benchmark(dataset, setup, func) + func = "[[editdistance.eval(a, b) for a in queries] for b in choices]" + result_df["editdistance"] = run_benchmark(dataset, setup, func) def run_benchmarks_pyxdameraulevenshtein(func_name, result_df, dataset): if func_name != "OSA": return - print(f"Benchmarking pyxdameraulevenshtein") + print("Benchmarking pyxdameraulevenshtein") setup = "import pyxdameraulevenshtein" - func = f"[[pyxdameraulevenshtein.damerau_levenshtein_distance(a, b) for a in queries] for b in choices]" - result_df[f"pyxdameraulevenshtein"] = run_benchmark(dataset, setup, func) + func = "[[pyxdameraulevenshtein.damerau_levenshtein_distance(a, b) for a in queries] for b in choices]" + result_df["pyxdameraulevenshtein"] = run_benchmark(dataset, setup, func) AVAILABLE_BENCHMARKS = [ diff --git a/bench/benchmark_visualize.py b/bench/benchmark_visualize.py index 2ba153bd..0c7af845 100644 --- a/bench/benchmark_visualize.py +++ b/bench/benchmark_visualize.py @@ -9,7 +9,7 @@ ax = df.plot(x="x_axis") -#plt.xticks(list(range(0, 64*20+1, 64))) +# plt.xticks(list(range(0, 64*20+1, 64))) plt.title( "Performance comparison of the \nDamerauLevenshtein similarity in different libraries" @@ -17,7 +17,7 @@ plt.xlabel("string length [in characters]") plt.ylabel("runtime [μs]") ax.set_xlim(xmin=0) -#ax.set_ylim(bottom=0) +# ax.set_ylim(bottom=0) # ax.set_yscale('log') plt.grid() plt.show() diff --git a/bench/common.py b/bench/common.py index becc43a5..b988ad31 100644 --- a/bench/common.py +++ b/bench/common.py @@ -1,9 +1,10 @@ -import timeit - import json +import timeit from urllib.request import urlopen + from packaging.version import Version + def find_versions(package_name): url = f"https://pypi.org/pypi/{package_name}/json" data = json.load(urlopen(url)) @@ -11,6 +12,7 @@ def find_versions(package_name): versions.sort(key=Version, reverse=True) return versions + def benchmark(name, func, setup, lengths, count): print(f"starting {name}") start = timeit.default_timer() diff --git a/bench/requirements.txt b/bench/requirements.txt index 195b7ad9..d19321df 100644 --- a/bench/requirements.txt +++ b/bench/requirements.txt @@ -1,11 +1,10 @@ -tqdm -numpy +editdistance +edlib +jellyfish matplotlib +numpy pandas -jellyfish +polyleven pyxdameraulevenshtein thefuzz -editdistance -edlib -polyleven - +tqdm diff --git a/docs/index.rst b/docs/index.rst index fd9855b1..b3654174 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -44,4 +44,4 @@ Distributed under a :ref:`MIT License `, RapidFuzz is develope References GitHub Repository -.. |header-image| image:: img/RapidFuzz.svg \ No newline at end of file +.. |header-image| image:: img/RapidFuzz.svg diff --git a/setup.cfg b/setup.cfg index 804a966b..3485f502 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,5 +2,5 @@ max-line-length = 120 show_source = True exclude = .git, __pycache__, build, dist, docs, tools, venv -extend-ignore = E203, E722, B903, B950, N801, N802, N806 +extend-ignore = E203, E722, B903, B950, N801, N802, N806, B905, B907 extend-select = B9 diff --git a/src/rapidfuzz/CMakeLists.txt b/src/rapidfuzz/CMakeLists.txt index 0fb10963..d3759de1 100644 --- a/src/rapidfuzz/CMakeLists.txt +++ b/src/rapidfuzz/CMakeLists.txt @@ -33,9 +33,11 @@ endfunction(rf_add_library) if(RAPIDFUZZ_ARCH_X64) create_cython_target(_feature_detector_cpp) - rf_add_library(_feature_detector_cpp ${_feature_detector_cpp} ${CMAKE_CURRENT_LIST_DIR}/FeatureDetector/CpuInfo.cpp) + rf_add_library(_feature_detector_cpp ${_feature_detector_cpp} + ${CMAKE_CURRENT_LIST_DIR}/FeatureDetector/CpuInfo.cpp) target_compile_features(_feature_detector_cpp PUBLIC cxx_std_17) - target_include_directories(_feature_detector_cpp PRIVATE ${RF_BASE_DIR}/rapidfuzz) + target_include_directories(_feature_detector_cpp + PRIVATE ${RF_BASE_DIR}/rapidfuzz) target_link_libraries(_feature_detector_cpp PRIVATE rapidfuzz::rapidfuzz) install(TARGETS _feature_detector_cpp LIBRARY DESTINATION src/rapidfuzz) endif() @@ -60,12 +62,14 @@ if(RAPIDFUZZ_ARCH_X64) target_compile_features(fuzz_cpp_impl_avx2 PUBLIC cxx_std_17) if(MSVC) - set_target_properties(fuzz_cpp_impl_avx2 PROPERTIES COMPILE_FLAGS "/arch:AVX2") + set_target_properties(fuzz_cpp_impl_avx2 PROPERTIES COMPILE_FLAGS + "/arch:AVX2") else() set_target_properties(fuzz_cpp_impl_avx2 PROPERTIES COMPILE_FLAGS "-mavx2") endif() - target_include_directories(fuzz_cpp_impl_avx2 PRIVATE ${RF_BASE_DIR}/rapidfuzz) + target_include_directories(fuzz_cpp_impl_avx2 + PRIVATE ${RF_BASE_DIR}/rapidfuzz) target_link_libraries(fuzz_cpp_impl_avx2 PRIVATE rapidfuzz::rapidfuzz) install(TARGETS fuzz_cpp_impl_avx2 LIBRARY DESTINATION src/rapidfuzz) endif() diff --git a/src/rapidfuzz/_feature_detector.py b/src/rapidfuzz/_feature_detector.py index 2aed3bd0..9a01d05e 100644 --- a/src/rapidfuzz/_feature_detector.py +++ b/src/rapidfuzz/_feature_detector.py @@ -2,11 +2,13 @@ # Copyright (C) 2022 Max Bachmann from __future__ import annotations +__all__ = ["AVX2", "SSE2", "supports"] + try: from rapidfuzz._feature_detector_cpp import AVX2, SSE2, supports -except: +except ImportError: SSE2 = 1 AVX2 = 2 def supports(features): - return False \ No newline at end of file + return False diff --git a/src/rapidfuzz/_feature_detector_cpp.pyx b/src/rapidfuzz/_feature_detector_cpp.pyx index cc8f24a2..330bcbff 100644 --- a/src/rapidfuzz/_feature_detector_cpp.pyx +++ b/src/rapidfuzz/_feature_detector_cpp.pyx @@ -3,6 +3,7 @@ from libc.stdint cimport uint32_t + cdef extern from "FeatureDetector/CpuInfo.hpp": cdef int CPU_FEATURE_SSE2 cdef int CPU_FEATURE_AVX2 diff --git a/src/rapidfuzz/_utils.py b/src/rapidfuzz/_utils.py index 5c960e9a..f48f1f44 100644 --- a/src/rapidfuzz/_utils.py +++ b/src/rapidfuzz/_utils.py @@ -84,7 +84,7 @@ def fallback_import( py_func = getattr(py_mod, name) if not py_func: raise ImportError( - f"cannot import name '{name}' from '{py_mod.__name}' ({py_mod.__file__})" + f"cannot import name {name!r} from {py_mod.__name!r} ({py_mod.__file__})" ) if cached_scorer_call: @@ -103,7 +103,7 @@ def fallback_import( cpp_func = getattr(cpp_mod, name) if not cpp_func: raise ImportError( - f"cannot import name '{name}' from '{cpp_mod.__name}' ({cpp_mod.__file__})" + f"cannot import name {name!r} from {cpp_mod.__name!r} ({cpp_mod.__file__})" ) # patch cpp function so help does not need to be duplicated diff --git a/src/rapidfuzz/cpp_common.hpp b/src/rapidfuzz/cpp_common.hpp index 2c85b71e..0ed70741 100644 --- a/src/rapidfuzz/cpp_common.hpp +++ b/src/rapidfuzz/cpp_common.hpp @@ -694,8 +694,7 @@ static inline bool multi_normalized_distance_init(RF_ScorerFunc* self, int64_t s }); } -static inline PyObject* opcodes_apply(const rf::Opcodes& ops, const RF_String& str1, - const RF_String& str2) +static inline PyObject* opcodes_apply(const rf::Opcodes& ops, const RF_String& str1, const RF_String& str2) { return visitor(str1, str2, [&](auto s1, auto s2) { auto proc_str = rf::opcodes_apply(ops, s1, s2); @@ -703,8 +702,7 @@ static inline PyObject* opcodes_apply(const rf::Opcodes& ops, const RF_String& s }); } -static inline PyObject* editops_apply(const rf::Editops& ops, const RF_String& str1, - const RF_String& str2) +static inline PyObject* editops_apply(const rf::Editops& ops, const RF_String& str1, const RF_String& str2) { return visitor(str1, str2, [&](auto s1, auto s2) { auto proc_str = rf::editops_apply(ops, s1, s2); diff --git a/src/rapidfuzz/distance/CMakeLists.txt b/src/rapidfuzz/distance/CMakeLists.txt index 7c716b02..424bdd03 100644 --- a/src/rapidfuzz/distance/CMakeLists.txt +++ b/src/rapidfuzz/distance/CMakeLists.txt @@ -56,15 +56,15 @@ if(RAPIDFUZZ_ARCH_X64) target_compile_features(metrics_cpp_avx2 PUBLIC cxx_std_17) if(MSVC) - set_target_properties(metrics_cpp_avx2 PROPERTIES COMPILE_FLAGS "/arch:AVX2") + set_target_properties(metrics_cpp_avx2 PROPERTIES COMPILE_FLAGS + "/arch:AVX2") else() set_target_properties(metrics_cpp_avx2 PROPERTIES COMPILE_FLAGS "-mavx2") endif() target_include_directories( metrics_cpp_avx2 PRIVATE ${RF_BASE_DIR}/rapidfuzz - ${RF_BASE_DIR}/rapidfuzz/distance) + ${RF_BASE_DIR}/rapidfuzz/distance) target_link_libraries(metrics_cpp_avx2 PRIVATE rapidfuzz::rapidfuzz) install(TARGETS metrics_cpp_avx2 LIBRARY DESTINATION src/rapidfuzz/distance) endif() - diff --git a/src/rapidfuzz/distance/DamerauLevenshtein_cpp.py b/src/rapidfuzz/distance/DamerauLevenshtein_cpp.py index 56994232..052949b8 100644 --- a/src/rapidfuzz/distance/DamerauLevenshtein_cpp.py +++ b/src/rapidfuzz/distance/DamerauLevenshtein_cpp.py @@ -1,16 +1,20 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2022 Max Bachmann -from rapidfuzz._feature_detector import AVX2, SSE2, supports +from rapidfuzz._feature_detector import AVX2, supports if supports(AVX2): - from rapidfuzz.distance.metrics_cpp_avx2 import damerau_levenshtein_distance as distance + from rapidfuzz.distance.metrics_cpp_avx2 import ( + damerau_levenshtein_distance as distance, + ) from rapidfuzz.distance.metrics_cpp_avx2 import ( damerau_levenshtein_normalized_distance as normalized_distance, ) from rapidfuzz.distance.metrics_cpp_avx2 import ( damerau_levenshtein_normalized_similarity as normalized_similarity, ) - from rapidfuzz.distance.metrics_cpp_avx2 import damerau_levenshtein_similarity as similarity + from rapidfuzz.distance.metrics_cpp_avx2 import ( + damerau_levenshtein_similarity as similarity, + ) else: from rapidfuzz.distance.metrics_cpp import damerau_levenshtein_distance as distance from rapidfuzz.distance.metrics_cpp import ( @@ -19,7 +23,9 @@ from rapidfuzz.distance.metrics_cpp import ( damerau_levenshtein_normalized_similarity as normalized_similarity, ) - from rapidfuzz.distance.metrics_cpp import damerau_levenshtein_similarity as similarity + from rapidfuzz.distance.metrics_cpp import ( + damerau_levenshtein_similarity as similarity, + ) __all__ = ["distance", "normalized_distance", "normalized_similarity", "similarity"] diff --git a/src/rapidfuzz/distance/Hamming_cpp.py b/src/rapidfuzz/distance/Hamming_cpp.py index 94766927..6b03338a 100644 --- a/src/rapidfuzz/distance/Hamming_cpp.py +++ b/src/rapidfuzz/distance/Hamming_cpp.py @@ -1,29 +1,29 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2022 Max Bachmann -from rapidfuzz._feature_detector import AVX2, SSE2, supports +from rapidfuzz._feature_detector import AVX2, supports if supports(AVX2): from rapidfuzz.distance.metrics_cpp_avx2 import hamming_distance as distance + from rapidfuzz.distance.metrics_cpp_avx2 import hamming_editops as editops from rapidfuzz.distance.metrics_cpp_avx2 import ( hamming_normalized_distance as normalized_distance, ) from rapidfuzz.distance.metrics_cpp_avx2 import ( hamming_normalized_similarity as normalized_similarity, ) - from rapidfuzz.distance.metrics_cpp_avx2 import hamming_similarity as similarity - from rapidfuzz.distance.metrics_cpp_avx2 import hamming_editops as editops from rapidfuzz.distance.metrics_cpp_avx2 import hamming_opcodes as opcodes + from rapidfuzz.distance.metrics_cpp_avx2 import hamming_similarity as similarity else: from rapidfuzz.distance.metrics_cpp import hamming_distance as distance + from rapidfuzz.distance.metrics_cpp import hamming_editops as editops from rapidfuzz.distance.metrics_cpp import ( hamming_normalized_distance as normalized_distance, ) from rapidfuzz.distance.metrics_cpp import ( hamming_normalized_similarity as normalized_similarity, ) - from rapidfuzz.distance.metrics_cpp import hamming_similarity as similarity - from rapidfuzz.distance.metrics_cpp import hamming_editops as editops from rapidfuzz.distance.metrics_cpp import hamming_opcodes as opcodes + from rapidfuzz.distance.metrics_cpp import hamming_similarity as similarity __all__ = [ diff --git a/src/rapidfuzz/distance/Indel_cpp.py b/src/rapidfuzz/distance/Indel_cpp.py index a7874a5e..79809337 100644 --- a/src/rapidfuzz/distance/Indel_cpp.py +++ b/src/rapidfuzz/distance/Indel_cpp.py @@ -1,29 +1,29 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2022 Max Bachmann -from rapidfuzz._feature_detector import AVX2, SSE2, supports +from rapidfuzz._feature_detector import AVX2, supports if supports(AVX2): from rapidfuzz.distance.metrics_cpp_avx2 import indel_distance as distance + from rapidfuzz.distance.metrics_cpp_avx2 import indel_editops as editops from rapidfuzz.distance.metrics_cpp_avx2 import ( indel_normalized_distance as normalized_distance, ) from rapidfuzz.distance.metrics_cpp_avx2 import ( indel_normalized_similarity as normalized_similarity, ) - from rapidfuzz.distance.metrics_cpp_avx2 import indel_similarity as similarity - from rapidfuzz.distance.metrics_cpp_avx2 import indel_editops as editops from rapidfuzz.distance.metrics_cpp_avx2 import indel_opcodes as opcodes + from rapidfuzz.distance.metrics_cpp_avx2 import indel_similarity as similarity else: from rapidfuzz.distance.metrics_cpp import indel_distance as distance + from rapidfuzz.distance.metrics_cpp import indel_editops as editops from rapidfuzz.distance.metrics_cpp import ( indel_normalized_distance as normalized_distance, ) from rapidfuzz.distance.metrics_cpp import ( indel_normalized_similarity as normalized_similarity, ) - from rapidfuzz.distance.metrics_cpp import indel_similarity as similarity - from rapidfuzz.distance.metrics_cpp import indel_editops as editops from rapidfuzz.distance.metrics_cpp import indel_opcodes as opcodes + from rapidfuzz.distance.metrics_cpp import indel_similarity as similarity __all__ = [ diff --git a/src/rapidfuzz/distance/JaroWinkler_cpp.py b/src/rapidfuzz/distance/JaroWinkler_cpp.py index 4031acda..e6340fb4 100644 --- a/src/rapidfuzz/distance/JaroWinkler_cpp.py +++ b/src/rapidfuzz/distance/JaroWinkler_cpp.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2022 Max Bachmann -from rapidfuzz._feature_detector import AVX2, SSE2, supports +from rapidfuzz._feature_detector import AVX2, supports if supports(AVX2): from rapidfuzz.distance.metrics_cpp_avx2 import jaro_winkler_distance as distance @@ -10,7 +10,9 @@ from rapidfuzz.distance.metrics_cpp_avx2 import ( jaro_winkler_normalized_similarity as normalized_similarity, ) - from rapidfuzz.distance.metrics_cpp_avx2 import jaro_winkler_similarity as similarity + from rapidfuzz.distance.metrics_cpp_avx2 import ( + jaro_winkler_similarity as similarity, + ) else: from rapidfuzz.distance.metrics_cpp import jaro_winkler_distance as distance from rapidfuzz.distance.metrics_cpp import ( diff --git a/src/rapidfuzz/distance/Jaro_cpp.py b/src/rapidfuzz/distance/Jaro_cpp.py index 821cd35e..b929e84d 100644 --- a/src/rapidfuzz/distance/Jaro_cpp.py +++ b/src/rapidfuzz/distance/Jaro_cpp.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2022 Max Bachmann -from rapidfuzz._feature_detector import AVX2, SSE2, supports +from rapidfuzz._feature_detector import AVX2, supports if supports(AVX2): from rapidfuzz.distance.metrics_cpp_avx2 import jaro_distance as distance diff --git a/src/rapidfuzz/distance/LCSseq_cpp.py b/src/rapidfuzz/distance/LCSseq_cpp.py index 23550f5e..d237ef01 100644 --- a/src/rapidfuzz/distance/LCSseq_cpp.py +++ b/src/rapidfuzz/distance/LCSseq_cpp.py @@ -1,29 +1,29 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2022 Max Bachmann -from rapidfuzz._feature_detector import AVX2, SSE2, supports +from rapidfuzz._feature_detector import AVX2, supports if supports(AVX2): from rapidfuzz.distance.metrics_cpp_avx2 import lcs_seq_distance as distance + from rapidfuzz.distance.metrics_cpp_avx2 import lcs_seq_editops as editops from rapidfuzz.distance.metrics_cpp_avx2 import ( lcs_seq_normalized_distance as normalized_distance, ) from rapidfuzz.distance.metrics_cpp_avx2 import ( lcs_seq_normalized_similarity as normalized_similarity, ) - from rapidfuzz.distance.metrics_cpp_avx2 import lcs_seq_similarity as similarity - from rapidfuzz.distance.metrics_cpp_avx2 import lcs_seq_editops as editops from rapidfuzz.distance.metrics_cpp_avx2 import lcs_seq_opcodes as opcodes + from rapidfuzz.distance.metrics_cpp_avx2 import lcs_seq_similarity as similarity else: from rapidfuzz.distance.metrics_cpp import lcs_seq_distance as distance + from rapidfuzz.distance.metrics_cpp import lcs_seq_editops as editops from rapidfuzz.distance.metrics_cpp import ( lcs_seq_normalized_distance as normalized_distance, ) from rapidfuzz.distance.metrics_cpp import ( lcs_seq_normalized_similarity as normalized_similarity, ) - from rapidfuzz.distance.metrics_cpp import lcs_seq_similarity as similarity - from rapidfuzz.distance.metrics_cpp import lcs_seq_editops as editops from rapidfuzz.distance.metrics_cpp import lcs_seq_opcodes as opcodes + from rapidfuzz.distance.metrics_cpp import lcs_seq_similarity as similarity __all__ = [ diff --git a/src/rapidfuzz/distance/Levenshtein.py b/src/rapidfuzz/distance/Levenshtein.py index 3d053982..307d5be8 100644 --- a/src/rapidfuzz/distance/Levenshtein.py +++ b/src/rapidfuzz/distance/Levenshtein.py @@ -9,6 +9,8 @@ from __future__ import annotations +from typing import Any, Callable + from rapidfuzz._utils import ScorerFlag as _ScorerFlag from rapidfuzz._utils import fallback_import as _fallback_import diff --git a/src/rapidfuzz/distance/Levenshtein_cpp.py b/src/rapidfuzz/distance/Levenshtein_cpp.py index 161f90c5..1a3e9a2f 100644 --- a/src/rapidfuzz/distance/Levenshtein_cpp.py +++ b/src/rapidfuzz/distance/Levenshtein_cpp.py @@ -1,29 +1,29 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2022 Max Bachmann -from rapidfuzz._feature_detector import AVX2, SSE2, supports +from rapidfuzz._feature_detector import AVX2, supports if supports(AVX2): from rapidfuzz.distance.metrics_cpp_avx2 import levenshtein_distance as distance + from rapidfuzz.distance.metrics_cpp_avx2 import levenshtein_editops as editops from rapidfuzz.distance.metrics_cpp_avx2 import ( levenshtein_normalized_distance as normalized_distance, ) from rapidfuzz.distance.metrics_cpp_avx2 import ( levenshtein_normalized_similarity as normalized_similarity, ) - from rapidfuzz.distance.metrics_cpp_avx2 import levenshtein_similarity as similarity - from rapidfuzz.distance.metrics_cpp_avx2 import levenshtein_editops as editops from rapidfuzz.distance.metrics_cpp_avx2 import levenshtein_opcodes as opcodes + from rapidfuzz.distance.metrics_cpp_avx2 import levenshtein_similarity as similarity else: from rapidfuzz.distance.metrics_cpp import levenshtein_distance as distance + from rapidfuzz.distance.metrics_cpp import levenshtein_editops as editops from rapidfuzz.distance.metrics_cpp import ( levenshtein_normalized_distance as normalized_distance, ) from rapidfuzz.distance.metrics_cpp import ( levenshtein_normalized_similarity as normalized_similarity, ) - from rapidfuzz.distance.metrics_cpp import levenshtein_similarity as similarity - from rapidfuzz.distance.metrics_cpp import levenshtein_editops as editops from rapidfuzz.distance.metrics_cpp import levenshtein_opcodes as opcodes + from rapidfuzz.distance.metrics_cpp import levenshtein_similarity as similarity __all__ = [ diff --git a/src/rapidfuzz/distance/OSA_cpp.py b/src/rapidfuzz/distance/OSA_cpp.py index 2f05dcd8..f85e8495 100644 --- a/src/rapidfuzz/distance/OSA_cpp.py +++ b/src/rapidfuzz/distance/OSA_cpp.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2022 Max Bachmann -from rapidfuzz._feature_detector import AVX2, SSE2, supports +from rapidfuzz._feature_detector import AVX2, supports if supports(AVX2): from rapidfuzz.distance.metrics_cpp_avx2 import osa_distance as distance diff --git a/src/rapidfuzz/distance/Postfix_cpp.py b/src/rapidfuzz/distance/Postfix_cpp.py index 6dff9e07..e4e73fae 100644 --- a/src/rapidfuzz/distance/Postfix_cpp.py +++ b/src/rapidfuzz/distance/Postfix_cpp.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2022 Max Bachmann -from rapidfuzz._feature_detector import AVX2, SSE2, supports +from rapidfuzz._feature_detector import AVX2, supports if supports(AVX2): from rapidfuzz.distance.metrics_cpp_avx2 import postfix_distance as distance diff --git a/src/rapidfuzz/distance/Prefix_cpp.py b/src/rapidfuzz/distance/Prefix_cpp.py index fe3256b9..658c6676 100644 --- a/src/rapidfuzz/distance/Prefix_cpp.py +++ b/src/rapidfuzz/distance/Prefix_cpp.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2022 Max Bachmann -from rapidfuzz._feature_detector import AVX2, SSE2, supports +from rapidfuzz._feature_detector import AVX2, supports if supports(AVX2): from rapidfuzz.distance.metrics_cpp_avx2 import prefix_distance as distance diff --git a/src/rapidfuzz/distance/_initialize.pyi b/src/rapidfuzz/distance/_initialize.pyi index e2fd13ba..5a21c26d 100644 --- a/src/rapidfuzz/distance/_initialize.pyi +++ b/src/rapidfuzz/distance/_initialize.pyi @@ -35,7 +35,6 @@ class Editop: def __repr__(self) -> str: ... class Editops: - _src_len: int _dest_len: int _editops: list[Editop] diff --git a/src/rapidfuzz/distance/metrics.hpp b/src/rapidfuzz/distance/metrics.hpp index c25b1eff..b2a4e4cf 100644 --- a/src/rapidfuzz/distance/metrics.hpp +++ b/src/rapidfuzz/distance/metrics.hpp @@ -8,15 +8,14 @@ static inline int64_t levenshtein_distance_func(const RF_String& str1, const RF_ { return visitor(str1, str2, [&](auto s1, auto s2) { return rf::levenshtein_distance(s1, s2, {insertion, deletion, substitution}, score_cutoff, - score_hint); + score_hint); }); } static inline bool LevenshteinDistanceInit(RF_ScorerFunc* self, const RF_Kwargs* kwargs, int64_t str_count, const RF_String* str) { - rf::LevenshteinWeightTable weights = - *static_cast(kwargs->context); + rf::LevenshteinWeightTable weights = *static_cast(kwargs->context); #ifdef RAPIDFUZZ_X64 if (weights.insert_cost == 1 && weights.delete_cost == 1 && weights.replace_cost == 1) { @@ -33,8 +32,7 @@ static inline bool LevenshteinMultiStringSupport(const RF_Kwargs* kwargs) *static_cast(kwargs->context); #ifdef RAPIDFUZZ_X64 - if (weights.insert_cost == 1 && weights.delete_cost == 1 && weights.replace_cost == 1) - return true; + if (weights.insert_cost == 1 && weights.delete_cost == 1 && weights.replace_cost == 1) return true; #endif return false; } @@ -45,20 +43,20 @@ static inline double levenshtein_normalized_distance_func(const RF_String& str1, double score_hint) { return visitor(str1, str2, [&](auto s1, auto s2) { - return rf::levenshtein_normalized_distance(s1, s2, {insertion, deletion, substitution}, - score_cutoff, score_hint); + return rf::levenshtein_normalized_distance(s1, s2, {insertion, deletion, substitution}, score_cutoff, + score_hint); }); } static inline bool LevenshteinNormalizedDistanceInit(RF_ScorerFunc* self, const RF_Kwargs* kwargs, int64_t str_count, const RF_String* str) { - rf::LevenshteinWeightTable weights = - *static_cast(kwargs->context); + rf::LevenshteinWeightTable weights = *static_cast(kwargs->context); #ifdef RAPIDFUZZ_X64 if (weights.insert_cost == 1 && weights.delete_cost == 1 && weights.replace_cost == 1) { if (str_count != 1) - return multi_normalized_distance_init(self, str_count, str); + return multi_normalized_distance_init(self, str_count, + str); } #endif @@ -71,15 +69,14 @@ static inline int64_t levenshtein_similarity_func(const RF_String& str1, const R { return visitor(str1, str2, [&](auto s1, auto s2) { return rf::levenshtein_similarity(s1, s2, {insertion, deletion, substitution}, score_cutoff, - score_hint); + score_hint); }); } static inline bool LevenshteinSimilarityInit(RF_ScorerFunc* self, const RF_Kwargs* kwargs, int64_t str_count, const RF_String* str) { - rf::LevenshteinWeightTable weights = - *static_cast(kwargs->context); + rf::LevenshteinWeightTable weights = *static_cast(kwargs->context); #ifdef RAPIDFUZZ_X64 if (weights.insert_cost == 1 && weights.delete_cost == 1 && weights.replace_cost == 1) { @@ -98,19 +95,19 @@ static inline double levenshtein_normalized_similarity_func(const RF_String& str { return visitor(str1, str2, [&](auto s1, auto s2) { return rf::levenshtein_normalized_similarity(s1, s2, {insertion, deletion, substitution}, - score_cutoff, score_hint); + score_cutoff, score_hint); }); } static inline bool LevenshteinNormalizedSimilarityInit(RF_ScorerFunc* self, const RF_Kwargs* kwargs, int64_t str_count, const RF_String* str) { - rf::LevenshteinWeightTable weights = - *static_cast(kwargs->context); + rf::LevenshteinWeightTable weights = *static_cast(kwargs->context); #ifdef RAPIDFUZZ_X64 if (weights.insert_cost == 1 && weights.delete_cost == 1 && weights.replace_cost == 1) { if (str_count != 1) - return multi_normalized_similarity_init(self, str_count, str); + return multi_normalized_similarity_init( + self, str_count, str); } #endif @@ -142,8 +139,7 @@ static inline double damerau_levenshtein_normalized_distance_func(const RF_Strin static inline bool DamerauLevenshteinNormalizedDistanceInit(RF_ScorerFunc* self, const RF_Kwargs*, int64_t str_count, const RF_String* str) { - return normalized_distance_init( - self, str_count, str); + return normalized_distance_init(self, str_count, str); } static inline int64_t damerau_levenshtein_similarity_func(const RF_String& str1, const RF_String& str2, @@ -171,8 +167,8 @@ static inline double damerau_levenshtein_normalized_similarity_func(const RF_Str static inline bool DamerauLevenshteinNormalizedSimilarityInit(RF_ScorerFunc* self, const RF_Kwargs*, int64_t str_count, const RF_String* str) { - return normalized_similarity_init( - self, str_count, str); + return normalized_similarity_init(self, str_count, + str); } /* Hamming */ @@ -261,8 +257,8 @@ static inline double indel_normalized_distance_func(const RF_String& str1, const return rf::indel_normalized_distance(s1, s2, score_cutoff); }); } -static inline bool IndelNormalizedDistanceInit(RF_ScorerFunc* self, const RF_Kwargs*, - int64_t str_count, const RF_String* str) +static inline bool IndelNormalizedDistanceInit(RF_ScorerFunc* self, const RF_Kwargs*, int64_t str_count, + const RF_String* str) { #ifdef RAPIDFUZZ_X64 if (str_count != 1) @@ -297,8 +293,8 @@ static inline double indel_normalized_similarity_func(const RF_String& str1, con return rf::indel_normalized_similarity(s1, s2, score_cutoff); }); } -static inline bool IndelNormalizedSimilarityInit(RF_ScorerFunc* self, const RF_Kwargs*, - int64_t str_count, const RF_String* str) +static inline bool IndelNormalizedSimilarityInit(RF_ScorerFunc* self, const RF_Kwargs*, int64_t str_count, + const RF_String* str) { #ifdef RAPIDFUZZ_X64 if (str_count != 1) @@ -342,8 +338,8 @@ static inline double lcs_seq_normalized_distance_func(const RF_String& str1, con return rf::lcs_seq_normalized_distance(s1, s2, score_cutoff); }); } -static inline bool LCSseqNormalizedDistanceInit(RF_ScorerFunc* self, const RF_Kwargs*, - int64_t str_count, const RF_String* str) +static inline bool LCSseqNormalizedDistanceInit(RF_ScorerFunc* self, const RF_Kwargs*, int64_t str_count, + const RF_String* str) { #ifdef RAPIDFUZZ_X64 if (str_count != 1) @@ -378,8 +374,8 @@ static inline double lcs_seq_normalized_similarity_func(const RF_String& str1, c return rf::lcs_seq_normalized_similarity(s1, s2, score_cutoff); }); } -static inline bool LCSseqNormalizedSimilarityInit(RF_ScorerFunc* self, const RF_Kwargs*, - int64_t str_count, const RF_String* str) +static inline bool LCSseqNormalizedSimilarityInit(RF_ScorerFunc* self, const RF_Kwargs*, int64_t str_count, + const RF_String* str) { #ifdef RAPIDFUZZ_X64 if (str_count != 1) @@ -397,7 +393,7 @@ static inline rf::Editops hamming_editops_func(const RF_String& str1, const RF_S } static inline rf::Editops levenshtein_editops_func(const RF_String& str1, const RF_String& str2, - int64_t score_hint) + int64_t score_hint) { return visitor(str1, str2, [&](auto s1, auto s2) { return rf::levenshtein_editops(s1, s2, score_hint); @@ -430,8 +426,7 @@ static inline bool OSADistanceInit(RF_ScorerFunc* self, const RF_Kwargs*, int64_ const RF_String* str) { #ifdef RAPIDFUZZ_X64 - if (str_count != 1) - return multi_distance_init(self, str_count, str); + if (str_count != 1) return multi_distance_init(self, str_count, str); #endif return distance_init(self, str_count, str); @@ -489,8 +484,8 @@ static inline double osa_normalized_similarity_func(const RF_String& str1, const return rf::osa_normalized_similarity(s1, s2, score_cutoff); }); } -static inline bool OSANormalizedSimilarityInit(RF_ScorerFunc* self, const RF_Kwargs*, - int64_t str_count, const RF_String* str) +static inline bool OSANormalizedSimilarityInit(RF_ScorerFunc* self, const RF_Kwargs*, int64_t str_count, + const RF_String* str) { #ifdef RAPIDFUZZ_X64 if (str_count != 1) @@ -577,8 +572,7 @@ static inline bool JaroWinklerNormalizedDistanceInit(RF_ScorerFunc* self, const int64_t str_count, const RF_String* str) { double prefix_weight = *static_cast(kwargs->context); - return normalized_distance_init(self, str_count, str, - prefix_weight); + return normalized_distance_init(self, str_count, str, prefix_weight); } static inline double jaro_winkler_similarity_func(const RF_String& str1, const RF_String& str2, @@ -606,8 +600,7 @@ static inline bool JaroWinklerNormalizedSimilarityInit(RF_ScorerFunc* self, cons int64_t str_count, const RF_String* str) { double prefix_weight = *static_cast(kwargs->context); - return normalized_similarity_init(self, str_count, str, - prefix_weight); + return normalized_similarity_init(self, str_count, str, prefix_weight); } /* Prefix */ diff --git a/src/rapidfuzz/distance/metrics_cpp_avx2.pyx b/src/rapidfuzz/distance/metrics_cpp_avx2.pyx index 3a4b5df8..f0d2667a 100644 --- a/src/rapidfuzz/distance/metrics_cpp_avx2.pyx +++ b/src/rapidfuzz/distance/metrics_cpp_avx2.pyx @@ -1,2 +1,2 @@ # this is a hack since I could not get this to build in cmake without it -include "metrics_cpp.pyx" \ No newline at end of file +include "metrics_cpp.pyx" diff --git a/src/rapidfuzz/fuzz.pyi b/src/rapidfuzz/fuzz.pyi index e0582aa1..9c2f8cd3 100644 --- a/src/rapidfuzz/fuzz.pyi +++ b/src/rapidfuzz/fuzz.pyi @@ -3,7 +3,7 @@ from __future__ import annotations -from typing import Any, Callable, Hashable, Sequence +from typing import Callable, Hashable, Sequence from rapidfuzz.distance import ScoreAlignment from rapidfuzz.utils import default_process diff --git a/src/rapidfuzz/fuzz_cpp.hpp b/src/rapidfuzz/fuzz_cpp.hpp index 5242c932..974e94c7 100644 --- a/src/rapidfuzz/fuzz_cpp.hpp +++ b/src/rapidfuzz/fuzz_cpp.hpp @@ -7,8 +7,7 @@ static inline double ratio_func(const RF_String& str1, const RF_String& str2, do return fuzz::ratio(s1, s2, score_cutoff); }); } -static inline bool RatioInit(RF_ScorerFunc* self, const RF_Kwargs*, int64_t str_count, - const RF_String* str) +static inline bool RatioInit(RF_ScorerFunc* self, const RF_Kwargs*, int64_t str_count, const RF_String* str) { #ifdef RAPIDFUZZ_X64 if (str_count != 1) @@ -40,7 +39,7 @@ static inline bool PartialRatioInit(RF_ScorerFunc* self, const RF_Kwargs*, int64 } rf::ScoreAlignment partial_ratio_alignment_func(const RF_String& str1, const RF_String& str2, - double score_cutoff) + double score_cutoff) { return visitor(str1, str2, [&](auto s1, auto s2) { return fuzz::partial_ratio_alignment(s1, s2, score_cutoff); @@ -58,7 +57,8 @@ static inline bool TokenSortRatioInit(RF_ScorerFunc* self, const RF_Kwargs*, int { #ifdef RAPIDFUZZ_X64 if (str_count != 1) - return multi_similarity_init(self, str_count, str); + return multi_similarity_init(self, str_count, + str); #endif return similarity_init(self, str_count, str); @@ -144,8 +144,7 @@ static inline double QRatio_func(const RF_String& str1, const RF_String& str2, d return fuzz::QRatio(s1, s2, score_cutoff); }); } -static inline bool QRatioInit(RF_ScorerFunc* self, const RF_Kwargs*, int64_t str_count, - const RF_String* str) +static inline bool QRatioInit(RF_ScorerFunc* self, const RF_Kwargs*, int64_t str_count, const RF_String* str) { #ifdef RAPIDFUZZ_X64 if (str_count != 1) diff --git a/src/rapidfuzz/fuzz_cpp.py b/src/rapidfuzz/fuzz_cpp.py index 99da293b..e00d3df0 100644 --- a/src/rapidfuzz/fuzz_cpp.py +++ b/src/rapidfuzz/fuzz_cpp.py @@ -2,19 +2,47 @@ # Copyright (C) 2022 Max Bachmann from __future__ import annotations -from rapidfuzz._feature_detector import AVX2, SSE2, supports +from rapidfuzz._feature_detector import AVX2, supports + +__all__ = [ + "QRatio", + "WRatio", + "partial_ratio", + "partial_ratio_alignment", + "partial_token_ratio", + "partial_token_set_ratio", + "partial_token_sort_ratio", + "ratio", + "token_ratio", + "token_set_ratio", + "token_sort_ratio", +] if supports(AVX2): from rapidfuzz.fuzz_cpp_impl_avx2 import ( - ratio, partial_ratio, - partial_ratio_alignment, token_sort_ratio, token_set_ratio, - token_ratio, partial_token_ratio, partial_token_sort_ratio, partial_token_set_ratio, - WRatio, QRatio + QRatio, + WRatio, + partial_ratio, + partial_ratio_alignment, + partial_token_ratio, + partial_token_set_ratio, + partial_token_sort_ratio, + ratio, + token_ratio, + token_set_ratio, + token_sort_ratio, ) else: from rapidfuzz.fuzz_cpp_impl import ( - ratio, partial_ratio, - partial_ratio_alignment, token_sort_ratio, token_set_ratio, - token_ratio, partial_token_ratio, partial_token_sort_ratio, partial_token_set_ratio, - WRatio, QRatio + QRatio, + WRatio, + partial_ratio, + partial_ratio_alignment, + partial_token_ratio, + partial_token_set_ratio, + partial_token_sort_ratio, + ratio, + token_ratio, + token_set_ratio, + token_sort_ratio, ) diff --git a/src/rapidfuzz/fuzz_cpp_impl_avx2.pyx b/src/rapidfuzz/fuzz_cpp_impl_avx2.pyx index 0c1c8309..8ba9f60e 100644 --- a/src/rapidfuzz/fuzz_cpp_impl_avx2.pyx +++ b/src/rapidfuzz/fuzz_cpp_impl_avx2.pyx @@ -1,2 +1,2 @@ # this is a hack since I could not get this to build in cmake without it -include "fuzz_cpp_impl.pyx" \ No newline at end of file +include "fuzz_cpp_impl.pyx" diff --git a/src/rapidfuzz/fuzz_py.py b/src/rapidfuzz/fuzz_py.py index e42dbd34..3f6152f8 100644 --- a/src/rapidfuzz/fuzz_py.py +++ b/src/rapidfuzz/fuzz_py.py @@ -2,8 +2,8 @@ # Copyright (C) 2022 Max Bachmann from __future__ import annotations -from math import ceil, isnan -from typing import Any, Callable, Hashable +from math import ceil +from typing import Callable, Hashable from rapidfuzz._utils import is_none from rapidfuzz.distance import ScoreAlignment diff --git a/src/rapidfuzz/process_py.py b/src/rapidfuzz/process_py.py index 99e03671..6c296357 100644 --- a/src/rapidfuzz/process_py.py +++ b/src/rapidfuzz/process_py.py @@ -6,7 +6,6 @@ import heapq from math import isnan from typing import ( - TYPE_CHECKING, Any, Callable, Collection, @@ -523,7 +522,7 @@ def extract( try: import numpy as np -except: +except BaseException: pass diff --git a/tests/distance/test_Jaro.py b/tests/distance/test_Jaro.py index 5c1d52be..50b8ebf4 100644 --- a/tests/distance/test_Jaro.py +++ b/tests/distance/test_Jaro.py @@ -1,4 +1,5 @@ import pytest + from tests.distance.common import Jaro diff --git a/tests/distance/test_JaroWinkler.py b/tests/distance/test_JaroWinkler.py index ee2e2a22..05b0f427 100644 --- a/tests/distance/test_JaroWinkler.py +++ b/tests/distance/test_JaroWinkler.py @@ -1,4 +1,5 @@ import pytest + from tests.distance.common import JaroWinkler diff --git a/tests/distance/test_OSA.py b/tests/distance/test_OSA.py index 4aae00ab..21bdd631 100644 --- a/tests/distance/test_OSA.py +++ b/tests/distance/test_OSA.py @@ -1,4 +1,4 @@ -from rapidfuzz.distance import OSA_cpp, OSA_py +from rapidfuzz.distance import OSA_cpp from tests.distance.common import OSA diff --git a/tests/distance/test_distance.py b/tests/distance/test_distance.py index 89debf3a..f8c8d2d8 100644 --- a/tests/distance/test_distance.py +++ b/tests/distance/test_distance.py @@ -1,4 +1,5 @@ import pytest + from tests.distance.common import all_scorer_modules diff --git a/tests/test_fuzz.py b/tests/test_fuzz.py index 1145c6fb..6ce09809 100644 --- a/tests/test_fuzz.py +++ b/tests/test_fuzz.py @@ -4,7 +4,8 @@ from rapidfuzz import fuzz_cpp, fuzz_py, utils from rapidfuzz.distance import ScoreAlignment -from .common import symmetric_scorer_tester, scorer_tester + +from .common import symmetric_scorer_tester class fuzz: diff --git a/tests/test_hypothesis.py b/tests/test_hypothesis.py index 0787ee80..6d1969a0 100644 --- a/tests/test_hypothesis.py +++ b/tests/test_hypothesis.py @@ -8,14 +8,7 @@ from hypothesis import assume, given, settings from rapidfuzz import fuzz, process, utils -from rapidfuzz.distance import ( - Indel_cpp, - Indel_py, - JaroWinkler_cpp, - JaroWinkler_py, - Levenshtein_cpp, - Levenshtein_py, -) +from rapidfuzz.distance import Indel_cpp, Levenshtein_cpp, Levenshtein_py from tests.distance.common import Indel, JaroWinkler, Levenshtein diff --git a/tests/test_process.py b/tests/test_process.py index a2acd9be..1568e505 100644 --- a/tests/test_process.py +++ b/tests/test_process.py @@ -1,12 +1,11 @@ import pytest from rapidfuzz import fuzz, process_cpp, process_py -from rapidfuzz.distance import Levenshtein -from rapidfuzz.distance import Levenshtein_py +from rapidfuzz.distance import Levenshtein, Levenshtein_py try: import numpy as np -except: +except BaseException: pass @@ -406,11 +405,12 @@ def test_generators(): We should be able to use a generators as choices in process.extract as long as they are finite. """ + def generate_choices(): - choices = ['a', 'Bb', 'CcC'] + choices = ["a", "Bb", "CcC"] yield from choices - search = 'aaa' + search = "aaa" res1 = process_cpp.extract(search, generate_choices()) res2 = process_py.extract(search, generate_choices()) assert res1 == res2