Skip to content

Commit

Permalink
apply code formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbachmann committed Apr 11, 2023
1 parent 5e3edde commit 4fc08a5
Show file tree
Hide file tree
Showing 43 changed files with 212 additions and 179 deletions.
5 changes: 3 additions & 2 deletions .codespell-ignore-lines
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
"C'est la vie",
"c est la vie",
"C'est la vie",
"c est la vie",
>>> s2 = "cetain"
2 changes: 1 addition & 1 deletion .github/workflows/releasebuild.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
python_tag: "pp38-*"
- arch: auto32
python_tag: "pp39-*"

# ARM64 only supported only supported on cpython >= 3.9
- arch: ARM64
python_tag: "pp37-*"
Expand Down
30 changes: 15 additions & 15 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
repos:
# Standard hooks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: "v4.3.0"
rev: "v4.4.0"
hooks:
- id: check-added-large-files
- id: check-case-conflict
Expand All @@ -32,53 +32,53 @@ repos:

# Upgrade old Python syntax
- repo: https://github.com/asottile/pyupgrade
rev: "v2.38.2"
rev: "v3.3.1"
hooks:
- id: pyupgrade
args: [--py37-plus]

# Nicely sort includes
- repo: https://github.com/PyCQA/isort
rev: "5.10.1"
rev: "5.12.0"
hooks:
- id: isort

# Black, the code formatter, natively supports pre-commit
- repo: https://github.com/psf/black
rev: "22.8.0" # Keep in sync with blacken-docs
rev: "23.3.0" # Keep in sync with blacken-docs
hooks:
- id: black

# Also code format the docs
- repo: https://github.com/asottile/blacken-docs
rev: "v1.12.1"
rev: "1.13.0"
hooks:
- id: blacken-docs
additional_dependencies:
- black==22.8.0 # keep in sync with black hook

# Changes tabs to spaces
- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: "v1.3.1"
rev: "v1.5.1"
hooks:
- id: remove-tabs

- repo: https://github.com/sirosen/texthooks
rev: "0.4.0"
rev: "0.5.0"
hooks:
- id: fix-ligatures
- id: fix-smartquotes

# Autoremoves unused imports
- repo: https://github.com/hadialqattan/pycln
rev: "v2.1.1"
rev: "v2.1.3"
hooks:
- id: pycln
stages: [manual]

# Checking for common mistakes
- repo: https://github.com/pre-commit/pygrep-hooks
rev: "v1.9.0"
rev: "v1.10.0"
hooks:
- id: python-check-blanket-noqa
- id: python-check-blanket-type-ignore
Expand All @@ -99,15 +99,15 @@ repos:

# Flake8 also supports pre-commit natively (same author)
- repo: https://github.com/PyCQA/flake8
rev: "5.0.4"
rev: "6.0.0"
hooks:
- id: flake8
exclude: ^(docs/.*|tools/.*)$
additional_dependencies: *flake8_dependencies

# PyLint has native support - not always usable, but works for us
- repo: https://github.com/PyCQA/pylint
rev: "v2.15.3"
rev: "v3.0.0a6"
hooks:
- id: pylint
files: ^pybind11
Expand All @@ -132,7 +132,7 @@ repos:

# Checks the manifest for missing files (native support)
- repo: https://github.com/mgedmin/check-manifest
rev: "0.48"
rev: "0.49"
hooks:
- id: check-manifest
# This is a slow hook, so only run this if --hook-stage manual is passed
Expand All @@ -143,15 +143,15 @@ repos:
# Use tools/codespell_ignore_lines_from_errors.py
# to rebuild .codespell-ignore-lines
- repo: https://github.com/codespell-project/codespell
rev: "v2.2.1"
rev: "v2.2.4"
hooks:
- id: codespell
exclude: ".supp$"
args: ["-x", ".codespell-ignore-lines"]

# Check for common shell mistakes
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: "v0.8.0.4"
rev: "v0.9.0.2"
hooks:
- id: shellcheck

Expand All @@ -166,7 +166,7 @@ repos:

# Clang format the codebase automatically
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: "v14.0.6"
rev: "v16.0.0"
hooks:
- id: clang-format
types_or: [c++, c]
11 changes: 6 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,19 @@ project(rapidfuzz LANGUAGES C CXX)

include(CMakePrintHelpers)
cmake_print_variables(CMAKE_AR CMAKE_C_COMPILER_AR CMAKE_CXX_COMPILER_AR)
cmake_print_variables(CMAKE_RANLIB CMAKE_C_COMPILER_RANLIB CMAKE_CXX_COMPILER_RANLIB)
cmake_print_variables(CMAKE_RANLIB CMAKE_C_COMPILER_RANLIB
CMAKE_CXX_COMPILER_RANLIB)

if ("${CMAKE_C_COMPILER_AR}" STREQUAL "")
if("${CMAKE_C_COMPILER_AR}" STREQUAL "")
set(CMAKE_C_COMPILER_AR "${CMAKE_AR}")
endif()
if ("${CMAKE_CXX_COMPILER_AR}" STREQUAL "")
if("${CMAKE_CXX_COMPILER_AR}" STREQUAL "")
set(CMAKE_CXX_COMPILER_AR "${CMAKE_AR}")
endif()
if ("${CMAKE_C_COMPILER_RANLIB}" STREQUAL "")
if("${CMAKE_C_COMPILER_RANLIB}" STREQUAL "")
set(CMAKE_C_COMPILER_RANLIB "${CMAKE_RANLIB}")
endif()
if ("${CMAKE_CXX_COMPILER_RANLIB}" STREQUAL "")
if("${CMAKE_CXX_COMPILER_RANLIB}" STREQUAL "")
set(CMAKE_CXX_COMPILER_RANLIB "${CMAKE_RANLIB}")
endif()

Expand Down
44 changes: 22 additions & 22 deletions bench/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@

import click
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from common import benchmark, find_versions
from packaging.version import Version
from tqdm import tqdm

Expand Down Expand Up @@ -52,7 +50,7 @@ def benchmark(result, setup, func, queries, choices):
# - a custom range
# - only the installed version
def get_rapidfuzz_versions(version):
url = f"https://pypi.org/pypi/rapidfuzz/json"
url = "https://pypi.org/pypi/rapidfuzz/json"
data = json.load(urlopen(url))
versions = list(data["releases"].keys())
versions.sort(key=Version, reverse=True)
Expand Down Expand Up @@ -115,8 +113,8 @@ def run_benchmarks_rapidfuzz(rapidfuzz_version, func_name, dataset, result_df):
func = "[[scorer(a, b) for a in queries] for b in choices]"

if rapidfuzz_version == "current":
print(f"Benchmarking rapidfuzz")
result_df[f"rapidfuzz"] = run_benchmark(dataset, setup, func)
print("Benchmarking rapidfuzz")
result_df["rapidfuzz"] = run_benchmark(dataset, setup, func)
else:
rapidfuzz_versions = get_rapidfuzz_versions(rapidfuzz_version)
os.environ["RAPIDFUZZ_BUILD_EXTENSION"] = "1"
Expand Down Expand Up @@ -153,53 +151,55 @@ def run_benchmarks_jellyfish(func_name, result_df, dataset):
if func_name not in JELLYFISH_SCORERS:
return

print(f"Benchmarking jellyfish")
print("Benchmarking jellyfish")
func = f"[[jellyfish.{JELLYFISH_SCORERS[func_name]}(a, b) for a in queries] for b in choices]"
result_df[f"jellyfish"] = run_benchmark(dataset, setup, func)
result_df["jellyfish"] = run_benchmark(dataset, setup, func)


def run_benchmarks_polyleven(func_name, result_df, dataset):
if func_name != "Levenshtein":
return

print(f"Benchmarking polyleven")
print("Benchmarking polyleven")
setup = "import polyleven"
func = f"[[polyleven.levenshtein(a, b) for a in queries] for b in choices]"
result_df[f"polyleven"] = run_benchmark(dataset, setup, func)
func = "[[polyleven.levenshtein(a, b) for a in queries] for b in choices]"
result_df["polyleven"] = run_benchmark(dataset, setup, func)


def run_benchmarks_edlib(func_name, result_df, dataset):
if func_name != "Levenshtein":
return

print(f"Benchmarking edlib")
print("Benchmarking edlib")
setup = "import edlib"
func = f"[[edlib.align(a, b) for a in queries] for b in choices]"
result_df[f"edlib"] = run_benchmark(dataset, setup, func)
func = "[[edlib.align(a, b) for a in queries] for b in choices]"
result_df["edlib"] = run_benchmark(dataset, setup, func)

print(f"Benchmarking edlib(k=max)")
func = f"[[edlib.align(a, b, k=max(len(a), len(b))) for a in queries] for b in choices]"
result_df[f"edlib(k=max)"] = run_benchmark(dataset, setup, func)
print("Benchmarking edlib(k=max)")
func = (
"[[edlib.align(a, b, k=max(len(a), len(b))) for a in queries] for b in choices]"
)
result_df["edlib(k=max)"] = run_benchmark(dataset, setup, func)


def run_benchmarks_editdistance(func_name, result_df, dataset):
if func_name != "Levenshtein":
return

print(f"Benchmarking editdistance")
print("Benchmarking editdistance")
setup = "import editdistance"
func = f"[[editdistance.eval(a, b) for a in queries] for b in choices]"
result_df[f"editdistance"] = run_benchmark(dataset, setup, func)
func = "[[editdistance.eval(a, b) for a in queries] for b in choices]"
result_df["editdistance"] = run_benchmark(dataset, setup, func)


def run_benchmarks_pyxdameraulevenshtein(func_name, result_df, dataset):
if func_name != "OSA":
return

print(f"Benchmarking pyxdameraulevenshtein")
print("Benchmarking pyxdameraulevenshtein")
setup = "import pyxdameraulevenshtein"
func = f"[[pyxdameraulevenshtein.damerau_levenshtein_distance(a, b) for a in queries] for b in choices]"
result_df[f"pyxdameraulevenshtein"] = run_benchmark(dataset, setup, func)
func = "[[pyxdameraulevenshtein.damerau_levenshtein_distance(a, b) for a in queries] for b in choices]"
result_df["pyxdameraulevenshtein"] = run_benchmark(dataset, setup, func)


AVAILABLE_BENCHMARKS = [
Expand Down
4 changes: 2 additions & 2 deletions bench/benchmark_visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@

ax = df.plot(x="x_axis")

#plt.xticks(list(range(0, 64*20+1, 64)))
# plt.xticks(list(range(0, 64*20+1, 64)))

plt.title(
"Performance comparison of the \nDamerauLevenshtein similarity in different libraries"
)
plt.xlabel("string length [in characters]")
plt.ylabel("runtime [μs]")
ax.set_xlim(xmin=0)
#ax.set_ylim(bottom=0)
# ax.set_ylim(bottom=0)
# ax.set_yscale('log')
plt.grid()
plt.show()
6 changes: 4 additions & 2 deletions bench/common.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import timeit

import json
import timeit
from urllib.request import urlopen

from packaging.version import Version


def find_versions(package_name):
url = f"https://pypi.org/pypi/{package_name}/json"
data = json.load(urlopen(url))
versions = list(data["releases"].keys())
versions.sort(key=Version, reverse=True)
return versions


def benchmark(name, func, setup, lengths, count):
print(f"starting {name}")
start = timeit.default_timer()
Expand Down
13 changes: 6 additions & 7 deletions bench/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
tqdm
numpy
editdistance
edlib
jellyfish
matplotlib
numpy
pandas
jellyfish
polyleven
pyxdameraulevenshtein
thefuzz
editdistance
edlib
polyleven

tqdm
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ Distributed under a :ref:`MIT License <my-license-label>`, RapidFuzz is develope
References
GitHub Repository <https://github.com/maxbachmann/RapidFuzz>

.. |header-image| image:: img/RapidFuzz.svg
.. |header-image| image:: img/RapidFuzz.svg
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
max-line-length = 120
show_source = True
exclude = .git, __pycache__, build, dist, docs, tools, venv
extend-ignore = E203, E722, B903, B950, N801, N802, N806
extend-ignore = E203, E722, B903, B950, N801, N802, N806, B905, B907
extend-select = B9
12 changes: 8 additions & 4 deletions src/rapidfuzz/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,11 @@ endfunction(rf_add_library)

if(RAPIDFUZZ_ARCH_X64)
create_cython_target(_feature_detector_cpp)
rf_add_library(_feature_detector_cpp ${_feature_detector_cpp} ${CMAKE_CURRENT_LIST_DIR}/FeatureDetector/CpuInfo.cpp)
rf_add_library(_feature_detector_cpp ${_feature_detector_cpp}
${CMAKE_CURRENT_LIST_DIR}/FeatureDetector/CpuInfo.cpp)
target_compile_features(_feature_detector_cpp PUBLIC cxx_std_17)
target_include_directories(_feature_detector_cpp PRIVATE ${RF_BASE_DIR}/rapidfuzz)
target_include_directories(_feature_detector_cpp
PRIVATE ${RF_BASE_DIR}/rapidfuzz)
target_link_libraries(_feature_detector_cpp PRIVATE rapidfuzz::rapidfuzz)
install(TARGETS _feature_detector_cpp LIBRARY DESTINATION src/rapidfuzz)
endif()
Expand All @@ -60,12 +62,14 @@ if(RAPIDFUZZ_ARCH_X64)
target_compile_features(fuzz_cpp_impl_avx2 PUBLIC cxx_std_17)

if(MSVC)
set_target_properties(fuzz_cpp_impl_avx2 PROPERTIES COMPILE_FLAGS "/arch:AVX2")
set_target_properties(fuzz_cpp_impl_avx2 PROPERTIES COMPILE_FLAGS
"/arch:AVX2")
else()
set_target_properties(fuzz_cpp_impl_avx2 PROPERTIES COMPILE_FLAGS "-mavx2")
endif()

target_include_directories(fuzz_cpp_impl_avx2 PRIVATE ${RF_BASE_DIR}/rapidfuzz)
target_include_directories(fuzz_cpp_impl_avx2
PRIVATE ${RF_BASE_DIR}/rapidfuzz)
target_link_libraries(fuzz_cpp_impl_avx2 PRIVATE rapidfuzz::rapidfuzz)
install(TARGETS fuzz_cpp_impl_avx2 LIBRARY DESTINATION src/rapidfuzz)
endif()
Expand Down
6 changes: 4 additions & 2 deletions src/rapidfuzz/_feature_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
# Copyright (C) 2022 Max Bachmann
from __future__ import annotations

__all__ = ["AVX2", "SSE2", "supports"]

try:
from rapidfuzz._feature_detector_cpp import AVX2, SSE2, supports
except:
except ImportError:
SSE2 = 1
AVX2 = 2

def supports(features):
return False
return False
1 change: 1 addition & 0 deletions src/rapidfuzz/_feature_detector_cpp.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from libc.stdint cimport uint32_t


cdef extern from "FeatureDetector/CpuInfo.hpp":
cdef int CPU_FEATURE_SSE2
cdef int CPU_FEATURE_AVX2
Expand Down
Loading

0 comments on commit 4fc08a5

Please sign in to comment.