diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 49d46d0..180efed 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: # Standard hooks - repo: https://github.com/pre-commit/pre-commit-hooks - rev: "v4.4.0" + rev: "v4.5.0" hooks: - id: check-added-large-files - id: check-case-conflict @@ -38,7 +38,7 @@ repos: # Also code format the docs - repo: https://github.com/asottile/blacken-docs - rev: "1.13.0" + rev: "1.16.0" hooks: - id: blacken-docs additional_dependencies: @@ -46,12 +46,12 @@ repos: # Changes tabs to spaces - repo: https://github.com/Lucas-C/pre-commit-hooks - rev: "v1.5.1" + rev: "v1.5.5" hooks: - id: remove-tabs - repo: https://github.com/sirosen/texthooks - rev: "0.5.0" + rev: "0.6.4" hooks: - id: fix-ligatures - id: fix-smartquotes @@ -101,14 +101,14 @@ repos: additional_dependencies: [cmake, ninja] - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.261 + rev: v0.3.0 hooks: - id: ruff args: ["--fix", "--show-fixes"] # Check for spelling - repo: https://github.com/codespell-project/codespell - rev: "v2.2.4" + rev: "v2.2.6" hooks: - id: codespell exclude: ".*/test_.*.py" @@ -116,7 +116,7 @@ repos: # Check for common shell mistakes - repo: https://github.com/shellcheck-py/shellcheck-py - rev: "v0.9.0.2" + rev: "v0.9.0.6" hooks: - id: shellcheck @@ -131,7 +131,7 @@ repos: # Clang format the codebase automatically - repo: https://github.com/pre-commit/mirrors-clang-format - rev: "v16.0.0" + rev: "v17.0.6" hooks: - id: clang-format types_or: [c++, c] diff --git a/README.md b/README.md index 9421725..91db178 100644 --- a/README.md +++ b/README.md @@ -211,8 +211,8 @@ The first benchmark compares the performance of the scorers in FuzzyWuzzy and Ra from Python in the following way: ```python3 for sample in samples: - for word in words: - scorer(sample, word) + for word in words: + scorer(sample, word) ``` The following graph shows how many elements are processed per second with each of the scorers. There are big performance differences between the different scorers. However each of the scorers is faster in RapidFuzz diff --git a/bench/benchmark.py b/bench/benchmark.py index 146ca34..d2d2a46 100644 --- a/bench/benchmark.py +++ b/bench/benchmark.py @@ -121,6 +121,7 @@ def run_benchmarks_rapidfuzz(rapidfuzz_version, func_name, dataset, result_df): [sys.executable, "-m", "pip", "install", f"rapidfuzz=={version}"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, + check=False, ) # older versions are likely not installable if res.returncode: diff --git a/bench/benchmark_partial_ratio_long_needle.py b/bench/benchmark_partial_ratio_long_needle.py index fc24426..029e44a 100644 --- a/bench/benchmark_partial_ratio_long_needle.py +++ b/bench/benchmark_partial_ratio_long_needle.py @@ -43,7 +43,7 @@ time_fuzzywuzzy = ( list(range(0, 256, 2)), count, ) - + [np.NaN] * 128 + + [np.nan] * 128 ) results = pd.DataFrame( diff --git a/bench/benchmark_partial_ratio_short_needle.py b/bench/benchmark_partial_ratio_short_needle.py index d290727..db381a5 100644 --- a/bench/benchmark_partial_ratio_short_needle.py +++ b/bench/benchmark_partial_ratio_short_needle.py @@ -43,7 +43,7 @@ time_fuzzywuzzy = ( list(range(64, 256, 2)), count, ) - + [np.NaN] * 128 + + [np.nan] * 128 ) results = pd.DataFrame( diff --git a/pyproject.toml b/pyproject.toml index dd061b3..6810f59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,8 +45,12 @@ disable = [ "missing-module-docstring", ] - [tool.ruff] +target-version = "py37" +src = ["src"] +exclude = [] + +[tool.ruff.lint] select = [ "E", "F", "W", # flake8 "B", # flake8-bugbear @@ -78,17 +82,14 @@ extend-ignore = [ "PT004", # Use underscore for non-returning fixture (use usefixture instead) "PTH123", # use pathlib instead of builtin open ] -target-version = "py37" -src = ["src"] unfixable = [ "T20", # Removes print statements "F841", # Removes unused variables ] -exclude = [] flake8-unused-arguments.ignore-variadic-names = true isort.required-imports = ["from __future__ import annotations"] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "tests/**" = ["T20"] "bench/**" = ["T20"] "_custom_build/backend.py" = ["T20"] diff --git a/src/rapidfuzz/distance/_initialize_py.py b/src/rapidfuzz/distance/_initialize_py.py index 2a1631a..a4dd8c6 100644 --- a/src/rapidfuzz/distance/_initialize_py.py +++ b/src/rapidfuzz/distance/_initialize_py.py @@ -42,7 +42,7 @@ def _list_to_editops( blocks.append(Editop(edit_type, src_pos, dest_pos)) # validate order of editops - for i in range(0, len(blocks) - 1): + for i in range(len(blocks) - 1): if blocks[i + 1].src_pos < blocks[i].src_pos or blocks[i + 1].dest_pos < blocks[i].dest_pos: msg = "List of edit operations out of order" raise ValueError(msg) @@ -104,7 +104,7 @@ def _list_to_opcodes( if blocks[-1].src_end != src_len or blocks[-1].dest_end != dest_len: msg = "List of edit operations does not end at the string ends" raise ValueError(msg) - for i in range(0, len(blocks) - 1): + for i in range(len(blocks) - 1): if blocks[i + 1].src_start != blocks[i].src_end or blocks[i + 1].dest_start != blocks[i].dest_end: msg = "List of edit operations is not continuous" raise ValueError(msg)