reduce template usage to a minimum

2020-03-31 15:16:03 +02:00 · 2020-03-31 15:16:03 +02:00 · 028db547d1
parent d665e3b961
commit 028db547d1
6 changed files with 439 additions and 498 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
 .vscode/
 __pycache__/
 .idea/
+build/
 rapidfuzz.egg-info/
 dist/
 *.data
--- a/cpp/src/levenshtein.cpp
+++ b/cpp/src/levenshtein.cpp
@ -0,0 +1,241 @@
+#include "levenshtein.hpp"
+
+levenshtein::Matrix levenshtein::matrix(std::wstring_view sentence1, std::wstring_view sentence2) {
+  Affix affix = utils::remove_common_affix(sentence1, sentence2);
+
+  std::size_t matrix_columns = sentence1.length() + 1;
+  std::size_t matrix_rows = sentence2.length() + 1;
+
+  std::vector<std::size_t> cache_matrix(matrix_rows*matrix_columns, 0);
+
+  for (std::size_t i = 0; i < matrix_rows; ++i) {
+    cache_matrix[i] = i;
+  }
+
+  for (std::size_t i = 1; i < matrix_columns; ++i) {
+    cache_matrix[matrix_rows*i] = i;
+  }
+
+  std::size_t sentence1_pos = 0;
+  for (const auto &char1 : sentence1) {
+    auto prev_cache = cache_matrix.begin() + sentence1_pos * matrix_rows;
+    auto result_cache = cache_matrix.begin() + (sentence1_pos + 1) * matrix_rows + 1;
+    std::size_t result = sentence1_pos + 1;
+    for (const auto &char2 : sentence2) {
+      result = std::min({
+        result + 1,
+        *prev_cache + (char1 != char2),
+        *(++prev_cache) + 1
+      });
+      *result_cache = result;
+      ++result_cache;
+    }
+    ++sentence1_pos;
+  }
+
+  return Matrix {
+      affix.prefix_len,
+      cache_matrix,
+      matrix_columns,
+      matrix_rows
+  };
+}
+
+
+std::vector<levenshtein::EditOp> levenshtein::editops(std::wstring_view sentence1, std::wstring_view sentence2) {
+  auto m = matrix(sentence1, sentence2);
+  std::size_t matrix_columns = m.matrix_columns;
+  std::size_t matrix_rows = m.matrix_rows;
+  std::size_t prefix_len = m.prefix_len;
+  auto lev_matrix = m.matrix;
+
+  std::vector<EditOp> ops;
+  ops.reserve(lev_matrix[matrix_columns * matrix_rows - 1]);
+
+  std::size_t i = matrix_columns - 1;
+  std::size_t j = matrix_rows - 1;
+  std::size_t position = matrix_columns * matrix_rows - 1;
+
+  auto is_replace = [=](std::size_t pos) {
+    return lev_matrix[pos - matrix_rows - 1] < lev_matrix[pos];
+  };
+  auto is_insert = [=](std::size_t pos) {
+    return lev_matrix[pos - 1] < lev_matrix[pos];
+  };
+  auto is_delete = [=](std::size_t pos) {
+    return lev_matrix[pos - matrix_rows] < lev_matrix[pos];
+  };
+  auto is_keep = [=](std::size_t pos) {
+    return lev_matrix[pos - matrix_rows - 1] == lev_matrix[pos];
+  };
+
+  while (i > 0 || j > 0) {
+    EditType op_type;
+
+    if (i && j && is_replace(position)) {
+      op_type = EditType::EditReplace;
+      --i;
+      --j;
+      position -= matrix_rows + 1;
+    } else if (j && is_insert(position)) {
+      op_type = EditType::EditInsert;
+      --j;
+      --position;
+    }  else if (i && is_delete(position)) {
+      op_type = EditType::EditDelete;
+      --i;
+      position -= matrix_rows;
+    } else if (is_keep(position)) {
+      --i;
+      --j;
+      position -= matrix_rows + 1;
+      // EditKeep does not has to be stored
+      continue;
+    } else {
+      throw std::logic_error("something went wrong extracting the editops from the levenshtein matrix");
+    }
+
+    ops.emplace_back(op_type, i + prefix_len, j + prefix_len);
+  }
+
+  std::reverse(ops.begin(), ops.end());
+  return ops;
+}
+
+
+std::vector<levenshtein::MatchingBlock> levenshtein::matching_blocks(std::wstring_view sentence1, std::wstring_view sentence2) {
+  auto edit_ops = editops(sentence1, sentence2);
+  std::size_t first_start = 0;
+	std::size_t second_start = 0;
+  std::vector<MatchingBlock> mblocks;
+
+  for (const auto &op : edit_ops) {
+    if (op.op_type == EditType::EditKeep) {
+      continue;
+    }
+
+    if (first_start < op.first_start || second_start < op.second_start) {
+      mblocks.emplace_back(first_start, second_start, op.first_start - first_start);
+      first_start = op.first_start;
+      second_start = op.second_start;
+    }
+
+    switch (op.op_type) {
+    case EditType::EditReplace:
+      first_start += 1;
+      second_start += 1;
+      break;
+    case EditType::EditDelete:
+      first_start += 1;
+      break;
+    case EditType::EditInsert:
+      second_start += 1;
+      break;
+    case EditType::EditKeep:
+      break;
+    }
+  }
+
+  mblocks.emplace_back(sentence1.length(), sentence2.length(), 0);
+  return mblocks;
+}
+
+
+float levenshtein::normalized_distance(std::wstring_view sentence1, std::wstring_view sentence2, float min_ratio) {
+  if (sentence1.empty() || sentence2.empty()) {
+    return sentence1.empty() && sentence2.empty();
+  }
+
+  std::size_t sentence1_len = utils::joined_size(sentence1);
+  std::size_t sentence2_len = utils::joined_size(sentence2);
+  std::size_t max_len = std::max(sentence1_len, sentence2_len);
+
+  // constant time calculation to find a string ratio based on the string length
+  // so it can exit early without running any levenshtein calculations
+  std::size_t min_distance = (sentence1_len > sentence2_len)
+    ? sentence1_len - sentence2_len
+    : sentence2_len - sentence1_len;
+
+  float len_ratio = 1.0 - (float)min_distance / (float)max_len;
+  if (len_ratio < min_ratio) {
+    return 0.0;
+  }
+
+  std::size_t dist = distance(sentence1, sentence2);
+
+  float ratio = 1.0 - (float)dist / (float)max_len;
+  return (ratio >= min_ratio) ? ratio : 0.0;
+}
+
+
+std::size_t levenshtein::distance(std::wstring_view sentence1, std::wstring_view sentence2) {
+
+  utils::remove_common_affix(sentence1, sentence2);
+
+  if (sentence2.size() > sentence1.size()) std::swap(sentence1, sentence2);
+
+  if (sentence2.empty()) {
+    return sentence1.length();
+  }
+
+  std::vector<std::size_t> cache(sentence2.length()+1);
+  std::iota(cache.begin(), cache.end(), 0);
+
+  for (const auto &char1 : sentence1) {
+    auto cache_iter = cache.begin();
+    std::size_t temp = *cache_iter;
+    *cache_iter += 1;
+
+    for (const auto& char2 : sentence2) {
+      if (char1 != char2) {
+        ++temp;
+      }
+
+			temp = std::min({
+          *cache_iter + 1,
+          *(++cache_iter) + 1,
+			    temp
+      });
+			std::swap(*cache_iter, temp);
+		}
+  }
+  return cache.back();
+}
+
+
+std::size_t levenshtein::generic_distance(std::wstring_view sentence1, std::wstring_view sentence2,
+                                      std::size_t insert_cost, std::size_t delete_cost, std::size_t replace_cost)
+{
+    utils::remove_common_affix(sentence1, sentence2);
+    if (sentence1.size() > sentence2.size()) {
+        std::swap(sentence1, sentence2);
+        std::swap(insert_cost, delete_cost);
+    }
+
+    std::vector<std::size_t> cache(sentence1.size() + 1);
+
+    cache[0] = 0;
+    for (std::size_t i = 1; i < cache.size(); ++i) {
+        cache[i] = cache[i - 1] + delete_cost;
+    }
+
+    for (const auto &char2 : sentence2) {
+        auto cache_iter = cache.begin();
+        std::size_t temp = *cache_iter;
+        *cache_iter += insert_cost;
+
+        for (const auto &char1 : sentence1) {
+            if (char1 != char2) {
+                temp = std::min({
+                  *cache_iter + delete_cost,
+                  *(cache_iter+1) + insert_cost,
+                  temp + replace_cost
+                });
+            }
+            ++cache_iter;
+            std::swap(*cache_iter, temp);
+        }
+    }
+
+    return cache.back();
+}
--- a/cpp/src/levenshtein.hpp
+++ b/cpp/src/levenshtein.hpp
@ -30,11 +30,9 @@ namespace levenshtein {
        std::size_t matrix_rows;
    };

-    template<typename CharT>
-    Matrix matrix(std::basic_string_view<CharT> sentence1, std::basic_string_view<CharT> sentence2);
+    Matrix matrix(std::wstring_view sentence1, std::wstring_view sentence2);

-    template<typename CharT>
-    std::vector<EditOp> editops(std::basic_string_view<CharT> sentence1, std::basic_string_view<CharT> sentence2);
+    std::vector<EditOp> editops(std::wstring_view sentence1, std::wstring_view sentence2);

    struct MatchingBlock {
    	std::size_t first_start;
@ -44,8 +42,7 @@ namespace levenshtein {
        : first_start(first_start), second_start(second_start), len(len) {}
    };

-    template<typename CharT>
-    std::vector<MatchingBlock> matching_blocks(std::basic_string_view<CharT> sentence1, std::basic_string_view<CharT> sentence2);
+    std::vector<MatchingBlock> matching_blocks(std::wstring_view sentence1, std::wstring_view sentence2);

    
    float normalized_distance(std::wstring_view sentence1, std::wstring_view sentence2, float min_ratio=0.0);
@ -53,8 +50,8 @@ namespace levenshtein {
    std::size_t distance(std::wstring_view sentence1, std::wstring_view sentence2);


-    template<typename MaxDistanceCalc=std::false_type, typename CharT>
-    auto levenshtein_word_cmp(const CharT &letter_cmp, const string_view_vec<CharT> &words,
+    template<typename MaxDistanceCalc=std::false_type>
+    auto levenshtein_word_cmp(const wchar_t &letter_cmp, const std::vector<std::wstring_view> &words,
                            std::vector<std::size_t> &cache, std::size_t current_cache);

    /**
@ -75,20 +72,14 @@ namespace levenshtein {
     *                     so when it can not exit early it should not be used
     * @return weighted levenshtein distance
     */
-    template<typename CharT, typename MaxDistance=std::nullopt_t>
-    std::size_t weighted_distance_impl(std::basic_string_view<CharT> sentence1, std::basic_string_view<CharT> sentence2, MaxDistance max_distance=std::nullopt);
-
    template<typename MaxDistance=std::nullopt_t>
    std::size_t weighted_distance(std::wstring_view sentence1, std::wstring_view sentence2, MaxDistance max_distance=std::nullopt);

    template<typename MaxDistance=std::nullopt_t>
-    std::size_t weighted_distance(std::string_view sentence1, std::string_view sentence2, MaxDistance max_distance=std::nullopt);
-
-    template<typename CharT, typename MaxDistance=std::nullopt_t>
-    std::size_t weighted_distance(string_view_vec<CharT> sentence1, string_view_vec<CharT> sentence2, MaxDistance max_distance=std::nullopt);
+    std::size_t weighted_distance(std::vector<std::wstring_view> sentence1, std::vector<std::wstring_view> sentence2, MaxDistance max_distance=std::nullopt);


-    size_t generic_distance(std::wstring_view source, std::wstring_view target, size_t insert_cost = 1, size_t delete_cost = 1, size_t replace_cost = 1);
+    std::size_t generic_distance(std::wstring_view source, std::wstring_view target, std::size_t insert_cost = 1, std::size_t delete_cost = 1, std::size_t replace_cost = 1);

    /**
    * Calculates a normalized score of the weighted Levenshtein algorithm between 0.0 and
@ -100,214 +91,8 @@ namespace levenshtein {



-template<typename CharT>
-inline levenshtein::Matrix levenshtein::matrix(std::basic_string_view<CharT> sentence1, std::basic_string_view<CharT> sentence2) {
-  Affix affix = remove_common_affix(sentence1, sentence2);
-
-  std::size_t matrix_columns = sentence1.length() + 1;
-  std::size_t matrix_rows = sentence2.length() + 1;
-
-  std::vector<std::size_t> cache_matrix(matrix_rows*matrix_columns, 0);
-
-  for (std::size_t i = 0; i < matrix_rows; ++i) {
-    cache_matrix[i] = i;
-  }
-
-  for (std::size_t i = 1; i < matrix_columns; ++i) {
-    cache_matrix[matrix_rows*i] = i;
-  }
-
-  std::size_t sentence1_pos = 0;
-  for (const auto &char1 : sentence1) {
-    auto prev_cache = cache_matrix.begin() + sentence1_pos * matrix_rows;
-    auto result_cache = cache_matrix.begin() + (sentence1_pos + 1) * matrix_rows + 1;
-    std::size_t result = sentence1_pos + 1;
-    for (const auto &char2 : sentence2) {
-      result = std::min({
-        result + 1,
-        *prev_cache + (char1 != char2),
-        *(++prev_cache) + 1
-      });
-      *result_cache = result;
-      ++result_cache;
-    }
-    ++sentence1_pos;
-  }
-
-  return Matrix {
-      affix.prefix_len,
-      cache_matrix,
-      matrix_columns,
-      matrix_rows
-  };
-}
-
-
-
-template<typename CharT>
-inline std::vector<levenshtein::EditOp>
-levenshtein::editops(std::basic_string_view<CharT> sentence1, std::basic_string_view<CharT> sentence2) {
-  auto m = matrix(sentence1, sentence2);
-  std::size_t matrix_columns = m.matrix_columns;
-  std::size_t matrix_rows = m.matrix_rows;
-  std::size_t prefix_len = m.prefix_len;
-  auto lev_matrix = m.matrix;
-
-  std::vector<EditOp> ops;
-  ops.reserve(lev_matrix[matrix_columns * matrix_rows - 1]);
-
-  std::size_t i = matrix_columns - 1;
-  std::size_t j = matrix_rows - 1;
-  std::size_t position = matrix_columns * matrix_rows - 1;
-
-  auto is_replace = [=](std::size_t pos) {
-    return lev_matrix[pos - matrix_rows - 1] < lev_matrix[pos];
-  };
-  auto is_insert = [=](std::size_t pos) {
-    return lev_matrix[pos - 1] < lev_matrix[pos];
-  };
-  auto is_delete = [=](std::size_t pos) {
-    return lev_matrix[pos - matrix_rows] < lev_matrix[pos];
-  };
-  auto is_keep = [=](std::size_t pos) {
-    return lev_matrix[pos - matrix_rows - 1] == lev_matrix[pos];
-  };
-
-  while (i > 0 || j > 0) {
-    EditType op_type;
-
-    if (i && j && is_replace(position)) {
-      op_type = EditType::EditReplace;
-      --i;
-      --j;
-      position -= matrix_rows + 1;
-    } else if (j && is_insert(position)) {
-      op_type = EditType::EditInsert;
-      --j;
-      --position;
-    }  else if (i && is_delete(position)) {
-      op_type = EditType::EditDelete;
-      --i;
-      position -= matrix_rows;
-    } else if (is_keep(position)) {
-      --i;
-      --j;
-      position -= matrix_rows + 1;
-      // EditKeep does not has to be stored
-      continue;
-    } else {
-      throw std::logic_error("something went wrong extracting the editops from the levenshtein matrix");
-    }
-
-    ops.emplace_back(op_type, i + prefix_len, j + prefix_len);
-  }
-
-  std::reverse(ops.begin(), ops.end());
-  return ops;
-}
-
-
-template<typename CharT>
-inline std::vector<levenshtein::MatchingBlock>
-levenshtein::matching_blocks(std::basic_string_view<CharT> sentence1, std::basic_string_view<CharT> sentence2) {
-  auto edit_ops = editops(sentence1, sentence2);
-  std::size_t first_start = 0;
-	std::size_t second_start = 0;
-  std::vector<MatchingBlock> mblocks;
-
-  for (const auto &op : edit_ops) {
-    if (op.op_type == EditType::EditKeep) {
-      continue;
-    }
-
-    if (first_start < op.first_start || second_start < op.second_start) {
-      mblocks.emplace_back(first_start, second_start, op.first_start - first_start);
-      first_start = op.first_start;
-      second_start = op.second_start;
-    }
-
-    switch (op.op_type) {
-    case EditType::EditReplace:
-      first_start += 1;
-      second_start += 1;
-      break;
-    case EditType::EditDelete:
-      first_start += 1;
-      break;
-    case EditType::EditInsert:
-      second_start += 1;
-      break;
-    case EditType::EditKeep:
-      break;
-    }
-  }
-
-  mblocks.emplace_back(sentence1.length(), sentence2.length(), 0);
-  return mblocks;
-}
-
-inline float levenshtein::normalized_distance(std::wstring_view sentence1, std::wstring_view sentence2, float min_ratio) {
-  if (sentence1.empty() || sentence2.empty()) {
-    return sentence1.empty() && sentence2.empty();
-  }
-
-  std::size_t sentence1_len = utils::joined_size(sentence1);
-  std::size_t sentence2_len = utils::joined_size(sentence2);
-  std::size_t max_len = std::max(sentence1_len, sentence2_len);
-
-  // constant time calculation to find a string ratio based on the string length
-  // so it can exit early without running any levenshtein calculations
-  std::size_t min_distance = (sentence1_len > sentence2_len)
-    ? sentence1_len - sentence2_len
-    : sentence2_len - sentence1_len;
-
-  float len_ratio = 1.0 - (float)min_distance / (float)max_len;
-  if (len_ratio < min_ratio) {
-    return 0.0;
-  }
-
-  std::size_t dist = distance(sentence1, sentence2);
-
-  float ratio = 1.0 - (float)dist / (float)max_len;
-  return (ratio >= min_ratio) ? ratio : 0.0;
-}
-
-inline std::size_t levenshtein::distance(std::wstring_view sentence1, std::wstring_view sentence2) {
-
-  remove_common_affix(sentence1, sentence2);
-
-  if (sentence2.size() > sentence1.size()) std::swap(sentence1, sentence2);
-
-  if (sentence2.empty()) {
-    return sentence1.length();
-  }
-
-  std::vector<std::size_t> cache(sentence2.length()+1);
-  std::iota(cache.begin(), cache.end(), 0);
-
-  for (const auto &char1 : sentence1) {
-    auto cache_iter = cache.begin();
-    size_t temp = *cache_iter;
-    *cache_iter += 1;
-
-    for (const auto& char2 : sentence2) {
-      if (char1 != char2) {
-        ++temp;
-      }
-
-			temp = std::min({
-          *cache_iter + 1,
-          *(++cache_iter) + 1,
-			    temp
-      });
-			std::swap(*cache_iter, temp);
-		}
-  }
-  return cache.back();
-}
-
-template<typename MaxDistanceCalc, typename CharT>
-inline auto levenshtein::levenshtein_word_cmp(const CharT &letter_cmp, const string_view_vec<CharT> &words,
+template<typename MaxDistanceCalc>
+inline auto levenshtein::levenshtein_word_cmp(const wchar_t &letter_cmp, const std::vector<std::wstring_view> &words,
                          std::vector<std::size_t> &cache, std::size_t current_cache)
 {
  std::size_t result = current_cache + 1;
@ -315,7 +100,7 @@ inline auto levenshtein::levenshtein_word_cmp(const CharT &letter_cmp, const str
  auto word_iter = words.begin();
  auto min_distance = std::numeric_limits<std::size_t>::max();

-  auto charCmp = [&] (const CharT &char2) {
+  auto charCmp = [&] (const wchar_t &char2) {
 	  if (letter_cmp == char2) { result = current_cache; }
 	  else { ++result; }

@ -356,9 +141,9 @@ inline auto levenshtein::levenshtein_word_cmp(const CharT &letter_cmp, const str
 }


-template<typename CharT, typename MaxDistance>
-inline std::size_t levenshtein::weighted_distance(string_view_vec<CharT> sentence1, string_view_vec<CharT> sentence2, MaxDistance max_distance) {
-  remove_common_affix(sentence1, sentence2);
+template<typename MaxDistance>
+inline std::size_t levenshtein::weighted_distance(std::vector<std::wstring_view> sentence1, std::vector<std::wstring_view> sentence2, MaxDistance max_distance) {
+  utils::remove_common_affix(sentence1, sentence2);
  std::size_t sentence1_len = utils::joined_size(sentence1);
  std::size_t sentence2_len = utils::joined_size(sentence2);

@ -380,7 +165,7 @@ inline std::size_t levenshtein::weighted_distance(string_view_vec<CharT> sentenc
  // no delimiter in front of first word
  for (const auto &letter : *word_iter) {
    if constexpr(!std::is_same_v<MaxDistance, std::nullopt_t>) {
-      size_t min_distance = levenshtein_word_cmp<std::true_type>(letter, sentence2, cache, range1_pos);
+      std::size_t min_distance = levenshtein_word_cmp<std::true_type>(letter, sentence2, cache, range1_pos);
      if (min_distance > max_distance) {
        return std::numeric_limits<std::size_t>::max();
      }
@ -395,19 +180,19 @@ inline std::size_t levenshtein::weighted_distance(string_view_vec<CharT> sentenc
  for (; word_iter != sentence1.end(); ++word_iter) {
    // whitespace between words
    if constexpr(!std::is_same_v<MaxDistance, std::nullopt_t>) {
-      size_t min_distance = levenshtein_word_cmp<std::true_type>((CharT)0x20, sentence2, cache, range1_pos);
+      std::size_t min_distance = levenshtein_word_cmp<std::true_type>((wchar_t)0x20, sentence2, cache, range1_pos);
      if (min_distance > max_distance) {
        return std::numeric_limits<std::size_t>::max();
      }
    } else {
-      levenshtein_word_cmp((CharT)0x20, sentence2, cache, range1_pos);
+      levenshtein_word_cmp((wchar_t)0x20, sentence2, cache, range1_pos);
    }

    ++range1_pos;

    for (const auto &letter : *word_iter) {
      if constexpr(!std::is_same_v<MaxDistance, std::nullopt_t>) {
-        size_t min_distance = levenshtein_word_cmp<std::true_type>(letter, sentence2, cache, range1_pos);
+        std::size_t min_distance = levenshtein_word_cmp<std::true_type>(letter, sentence2, cache, range1_pos);
        if (min_distance > max_distance) {
          return std::numeric_limits<std::size_t>::max();
        }
@ -425,20 +210,7 @@ inline std::size_t levenshtein::weighted_distance(string_view_vec<CharT> sentenc

 template<typename MaxDistance>
 inline std::size_t levenshtein::weighted_distance(std::wstring_view sentence1, std::wstring_view sentence2, MaxDistance max_distance) {
-  return weighted_distance_impl(sentence1, sentence2, max_distance);
-}
-
-
-template<typename MaxDistance>
-inline std::size_t levenshtein::weighted_distance(std::string_view sentence1, std::string_view sentence2, MaxDistance max_distance) {
-  return weighted_distance_impl(sentence1, sentence2, max_distance);
-}
-
-
-template<typename CharT, typename MaxDistance>
-inline std::size_t levenshtein::weighted_distance_impl(std::basic_string_view<CharT> sentence1, std::basic_string_view<CharT> sentence2, MaxDistance max_distance) {
-
-  remove_common_affix(sentence1, sentence2);
+  utils::remove_common_affix(sentence1, sentence2);

  if (sentence2.size() > sentence1.size()) std::swap(sentence1, sentence2);

@ -488,43 +260,7 @@ inline std::size_t levenshtein::weighted_distance_impl(std::basic_string_view<Ch
 }


-inline size_t levenshtein::generic_distance(std::wstring_view sentence1, std::wstring_view sentence2,
-                                      size_t insert_cost, size_t delete_cost, size_t replace_cost)
-{
-    remove_common_affix(sentence1, sentence2);
-    if (sentence1.size() > sentence2.size()) {
-        std::swap(sentence1, sentence2);
-        std::swap(insert_cost, delete_cost);
-    }

-    const size_t min_size = sentence1.size();
-    std::vector<size_t> cache(sentence1.size() + 1);
-
-    cache[0] = 0;
-    for (size_t i = 1; i < cache.size(); ++i) {
-        cache[i] = cache[i - 1] + delete_cost;
-    }
-
-    for (const auto &char2 : sentence2) {
-        auto cache_iter = cache.begin();
-        size_t temp = *cache_iter;
-        *cache_iter += insert_cost;
-
-        for (const auto &char1 : sentence1) {
-            if (char1 != char2) {
-                temp = std::min({
-                  *cache_iter + delete_cost,
-                  *(cache_iter+1) + insert_cost,
-                  temp + replace_cost
-                });
-            }
-            ++cache_iter;
-            std::swap(*cache_iter, temp);
-        }
-    }
-
-    return cache.back();
-}


 template<typename Sentence1, typename Sentence2>
@ -533,6 +269,7 @@ inline float levenshtein::normalized_weighted_distance(const Sentence1 &sentence
  if (sentence1.empty() || sentence2.empty()) {
    return sentence1.empty() && sentence2.empty();
  }
+  return 1;

  std::size_t sentence1_len = utils::joined_size(sentence1);
  std::size_t sentence2_len = utils::joined_size(sentence2);
--- a/cpp/src/utils.cpp
+++ b/cpp/src/utils.cpp
@ -0,0 +1,151 @@
+#include "utils.hpp"
+
+/**
+ * Finds the longest common prefix between two ranges
+ */
+template <typename InputIterator1, typename InputIterator2>
+inline auto common_prefix_length(InputIterator1 first1, InputIterator1 last1,
+	                        InputIterator2 first2, InputIterator2 last2)
+{
+    return std::distance(first1, std::mismatch(first1, last1, first2, last2).first);
+}
+
+/**
+ * Removes common prefix of two string views
+ */
+std::size_t remove_common_prefix(std::wstring_view& a, std::wstring_view& b) {
+  auto prefix = common_prefix_length(a.begin(), a.end(), b.begin(), b.end());
+	a.remove_prefix(prefix);
+	b.remove_prefix(prefix);
+  return prefix;
+}
+
+/**
+ * Removes common suffix of two string views
+ */
+std::size_t remove_common_suffix(std::wstring_view& a, std::wstring_view& b) {
+  auto suffix = common_prefix_length(a.rbegin(), a.rend(), b.rbegin(), b.rend());
+	a.remove_suffix(suffix);
+  b.remove_suffix(suffix);
+  return suffix;
+}
+
+/**
+ * Removes common affix of two string views
+ */
+Affix utils::remove_common_affix(std::wstring_view& a, std::wstring_view& b) {
+	return Affix {
+    remove_common_prefix(a, b),
+    remove_common_suffix(a, b)
+  };
+}
+
+template<typename T>
+void vec_remove_common_affix(T &a, T &b) {
+  auto prefix = std::mismatch(a.begin(), a.end(), b.begin(), b.end());
+  a.erase(a.begin(), prefix.first);
+  b.erase(b.begin(), prefix.second);
+
+  auto suffix = common_prefix_length(a.rbegin(), a.rend(), b.rbegin(), b.rend());
+  a.erase(a.end()-suffix, a.end());
+  b.erase(b.end()-suffix, b.end());
+}
+
+void utils::remove_common_affix(std::vector<std::wstring_view> &a, std::vector<std::wstring_view> &b)
+{
+  vec_remove_common_affix(a, b);
+  if (!a.empty() && !b.empty()) {
+    remove_common_prefix(a.front(), b.front());
+    remove_common_suffix(a.back(), b.back());
+  }
+}
+
+std::wstring utils::join(const std::vector<std::wstring_view> &sentence) {
+  if (sentence.empty()) {
+    return std::wstring();
+  }
+
+  auto sentence_iter = sentence.begin();
+  std::wstring result {*sentence_iter};
+  const std::wstring whitespace {0x20};
+  ++sentence_iter;
+  for (; sentence_iter != sentence.end(); ++sentence_iter) {
+    result.append(whitespace).append(std::wstring {*sentence_iter});
+  }
+  return result;
+}
+
+percent utils::result_cutoff(float result, percent score_cutoff) {
+  return (result >= score_cutoff) ? result : 0;
+}
+
+// trim from start (in place)
+void ltrim(std::wstring &s) {
+    s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) {
+        return !std::isspace(ch);
+    }));
+}
+
+
+// trim from end (in place)
+void rtrim(std::wstring &s) {
+    s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) {
+        return !std::isspace(ch);
+    }).base(), s.end());
+}
+
+
+// trim from both ends (in place)
+void utils::trim(std::wstring &s) {
+    ltrim(s);
+    rtrim(s);
+}
+
+
+void utils::lower_case(std::wstring &s) {
+   std::for_each(s.begin(), s.end(), [](wchar_t & c){
+	  c = ::tolower(c);
+  });
+}
+
+std::wstring utils::default_process(std::wstring s) {
+  trim(s);
+  lower_case(s);
+  return s;
+}
+
+DecomposedSet utils::set_decomposition(std::vector<std::wstring_view> a, std::vector<std::wstring_view> b) {
+  std::vector<std::wstring_view> intersection;
+  std::vector<std::wstring_view> difference_ab;
+  a.erase(std::unique(a.begin(), a.end()), a.end());
+  b.erase(std::unique(b.begin(), b.end()), b.end());
+  
+  for (const auto &current_a : a) {
+    auto element_b = std::find(b.begin(), b.end(), current_a);
+    if (element_b != b.end()) {
+      b.erase(element_b);
+      intersection.emplace_back(current_a);
+    } else {
+      difference_ab.emplace_back(current_a);
+    }
+  }
+
+  return DecomposedSet{intersection, difference_ab, b};
+}
+
+std::size_t utils::joined_size(const std::wstring_view &x){
+	return x.size();
+}
+
+
+std::size_t utils::joined_size(const std::vector<std::wstring_view> &x){
+  if (x.empty()) {
+    return 0;
+  }
+
+  // there is a whitespace between each word
+  std::size_t result = x.size() - 1;
+	for (const auto &y: x) result += y.size();
+
+	return result;
+}
--- a/cpp/src/utils.hpp
+++ b/cpp/src/utils.hpp
@ -7,54 +7,34 @@

 using percent = float;

-template<typename CharT>
-using string_view_vec = std::vector<std::basic_string_view<CharT>>;
-
-
-namespace detail {
-    template<typename T>
-    auto char_type(T const*) -> T;
-
-    template<typename T, typename U = typename T::const_iterator>
-    auto char_type(T const&) -> typename std::iterator_traits<U>::value_type;
-}
-
-template<typename T>
-using char_type = decltype(detail::char_type(std::declval<T const&>()));
-
-
-template<typename CharT>
 struct DecomposedSet {
-  string_view_vec<CharT> intersection;
-  string_view_vec<CharT> difference_ab;
-  string_view_vec<CharT> difference_ba;
-  DecomposedSet(string_view_vec<CharT> intersection, string_view_vec<CharT> difference_ab, string_view_vec<CharT> difference_ba)
+  std::vector<std::wstring_view> intersection;
+  std::vector<std::wstring_view> difference_ab;
+  std::vector<std::wstring_view> difference_ba;
+  DecomposedSet(std::vector<std::wstring_view> intersection, std::vector<std::wstring_view> difference_ab, std::vector<std::wstring_view> difference_ba)
    : intersection(std::move(intersection)), difference_ab(std::move(difference_ab)), difference_ba(std::move(difference_ba)) {}
 };


+struct Affix {
+  std::size_t prefix_len;
+  std::size_t suffix_len;
+};
+
 namespace utils {

-  template<
-      typename T, typename CharT = char_type<T>,
-      typename = std::enable_if_t<std::is_convertible<T const&, std::basic_string_view<CharT>>{}>
-  >
-  string_view_vec<CharT> splitSV(const T &str);
-
-
-  template<typename CharT>
-  DecomposedSet<CharT> set_decomposition(string_view_vec<CharT> a, string_view_vec<CharT> b);
-
-
  template<typename T>
-  std::size_t joined_size(const T &x);
+  std::vector<std::wstring_view> splitSV(const T &str);

-  template<typename T>
-  std::size_t joined_size(const std::vector<T> &x);
+  DecomposedSet set_decomposition(std::vector<std::wstring_view> a, std::vector<std::wstring_view> b);


-  template<typename CharT>
-  std::basic_string<CharT> join(const string_view_vec<CharT> &sentence);
+  std::size_t joined_size(const std::wstring_view &x);
+
+  std::size_t joined_size(const std::vector<std::wstring_view> &x);
+
+
+  std::wstring join(const std::vector<std::wstring_view> &sentence);

  percent result_cutoff(float result, percent score_cutoff);

@ -62,12 +42,16 @@ namespace utils {
  void lower_case(std::wstring &s);

  std::wstring default_process(std::wstring s);
+
+  Affix remove_common_affix(std::wstring_view& a, std::wstring_view& b);
+
+  void remove_common_affix(std::vector<std::wstring_view> &a, std::vector<std::wstring_view> &b);
 }


-template<typename T, typename CharT, typename>
-string_view_vec<CharT> utils::splitSV(const T &str) {
-  string_view_vec<CharT> output;
+template<typename T>
+inline std::vector<std::wstring_view> utils::splitSV(const T &str) {
+  std::vector<std::wstring_view> output;
  // assume a word length of 6 + 1 whitespace
  output.reserve(str.size() / 7);

@ -82,178 +66,3 @@ string_view_vec<CharT> utils::splitSV(const T &str) {

  return output;
 }
-
-
-template<typename CharT>
-DecomposedSet<CharT> utils::set_decomposition(string_view_vec<CharT> a, string_view_vec<CharT> b) {
-  string_view_vec<CharT> intersection;
-  string_view_vec<CharT> difference_ab;
-  a.erase(std::unique(a.begin(), a.end()), a.end());
-  b.erase(std::unique(b.begin(), b.end()), b.end());
-  
-  for (const auto &current_a : a) {
-    auto element_b = std::find(b.begin(), b.end(), current_a);
-    if (element_b != b.end()) {
-      b.erase(element_b);
-      intersection.emplace_back(current_a);
-    } else {
-      difference_ab.emplace_back(current_a);
-    }
-  }
-
-  return DecomposedSet{intersection, difference_ab, b};
-}
-
-
-/**
- * Finds the longest common prefix between two ranges
- */
-template <typename InputIterator1, typename InputIterator2>
-inline auto common_prefix_length(InputIterator1 first1, InputIterator1 last1,
-	                        InputIterator2 first2, InputIterator2 last2)
-{
-    return std::distance(first1, std::mismatch(first1, last1, first2, last2).first);
-}
-
-/**
- * Removes common prefix of two string views
- */
-template<typename CharT>
-inline std::size_t remove_common_prefix(std::basic_string_view<CharT>& a, std::basic_string_view<CharT>& b) {
-  auto prefix = common_prefix_length(a.begin(), a.end(), b.begin(), b.end());
-	a.remove_prefix(prefix);
-	b.remove_prefix(prefix);
-  return prefix;
-}
-
-/**
- * Removes common suffix of two string views
- */
-template<typename CharT>
-inline std::size_t remove_common_suffix(std::basic_string_view<CharT>& a, std::basic_string_view<CharT>& b) {
-  auto suffix = common_prefix_length(a.rbegin(), a.rend(), b.rbegin(), b.rend());
-	a.remove_suffix(suffix);
-  b.remove_suffix(suffix);
-  return suffix;
-}
-
-struct Affix {
-  std::size_t prefix_len;
-  std::size_t suffix_len;
-};
-
-/**
- * Removes common affix of two string views
- */
-template<typename CharT>
-inline Affix remove_common_affix(std::basic_string_view<CharT>& a, std::basic_string_view<CharT>& b) {
-	return Affix {
-    remove_common_prefix(a, b),
-    remove_common_suffix(a, b)
-  };
-}
-
-
-template<typename T>
-inline void vec_remove_common_affix(T &a, T &b) {
-  auto prefix = std::mismatch(a.begin(), a.end(), b.begin(), b.end());
-  a.erase(a.begin(), prefix.first);
-  b.erase(b.begin(), prefix.second);
-
-  auto suffix = common_prefix_length(a.rbegin(), a.rend(), b.rbegin(), b.rend());
-  a.erase(a.end()-suffix, a.end());
-  b.erase(b.end()-suffix, b.end());
-}
-
-template<typename T>
-inline void vec_common_affix(std::vector<T> &a, std::vector<T> &b) {
-  iterable_remove_common_affix(a, b);
-}
-
-template<typename T>
-inline void remove_common_affix(std::vector<T> &a, std::vector<T> &b)
-{
-  vec_remove_common_affix(a, b);
-  if (!a.empty() && !b.empty()) {
-    remove_common_prefix(a.front(), b.front());
-    remove_common_suffix(a.back(), b.back());
-  }
-}
-
-
-template<typename T>
-inline std::size_t utils::joined_size(const T &x){
-	return x.size();
-}
-
-
-template<typename T>
-inline std::size_t utils::joined_size(const std::vector<T> &x){
-  if (x.empty()) {
-    return 0;
-  }
-
-  // there is a whitespace between each word
-  std::size_t result = x.size() - 1;
-	for (const auto &y: x) result += y.size();
-
-	return result;
-}
-
-
-template<typename CharT>
-std::basic_string<CharT> utils::join(const string_view_vec<CharT> &sentence) {
-  if (sentence.empty()) {
-    return std::basic_string<CharT>();
-  }
-
-  auto sentence_iter = sentence.begin();
-  std::basic_string<CharT> result {*sentence_iter};
-  const std::basic_string<CharT> whitespace {0x20};
-  ++sentence_iter;
-  for (; sentence_iter != sentence.end(); ++sentence_iter) {
-    result.append(whitespace).append(std::basic_string<CharT> {*sentence_iter});
-  }
-  return result;
-}
-
-
-inline percent utils::result_cutoff(float result, percent score_cutoff) {
-  return (result >= score_cutoff) ? result : 0;
-}
-
-
-// trim from start (in place)
-inline void ltrim(std::wstring &s) {
-    s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) {
-        return !std::isspace(ch);
-    }));
-}
-
-
-// trim from end (in place)
-inline void rtrim(std::wstring &s) {
-    s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) {
-        return !std::isspace(ch);
-    }).base(), s.end());
-}
-
-
-// trim from both ends (in place)
-inline void utils::trim(std::wstring &s) {
-    ltrim(s);
-    rtrim(s);
-}
-
-
-inline void utils::lower_case(std::wstring &s) {
-   std::for_each(s.begin(), s.end(), [](wchar_t & c){
-	  c = ::tolower(c);
-  });
-}
-
-inline std::wstring utils::default_process(std::wstring s) {
-  trim(s);
-  lower_case(s);
-  return s;
-}
--- a/setup.py
+++ b/setup.py
@ -34,7 +34,9 @@ ext_modules = [
        [
            'python/src/rapidfuzz.cpp',
            'cpp/src/fuzz.cpp',
-            'cpp/src/process.cpp'
+            'cpp/src/process.cpp',
+            'cpp/src/levenshtein.cpp',
+            'cpp/src/utils.cpp'
        ],
        include_dirs=[
            # Path to pybind11 headers