@article{2003:hyrroe, author = {Hyyr\"{o}, Heikki}, title = {A Bit-Vector Algorithm for Computing Levenshtein and Damerau Edit Distances}, year = {2003}, issue_date = {Spring 2003}, publisher = {Publishing Association Nordic Journal of Computing}, address = {FIN}, volume = {10}, number = {1}, issn = {1236-6064}, abstract = {The edit distance between strings A and B is defined as the minimum number of edit operations needed in converting A into B or vice versa. The Levenshtein edit distance allows three types of operations: an insertion, a deletion or a substitution of a character. The Damerau edit distance allows the previous three plus in addition a transposition between two adjacent characters. To our best knowledge the best current practical algorithms for computing these edit distances run in time O(dm) and O(⌈m/w⌉(n + σ)), where d is the edit distance between the two strings, m and n are their lengths (m ≤ n), w is the computer word size and σ is the size of the alphabet. In this paper we present an algorithm that runs in time O(⌈d/w⌉m + ⌈n/w⌉σ) or O(⌈d/w⌉n + ⌈m/w⌉σ). The structure of the algorithm is such, that in practice it is mostly suitable for testing whether the edit distance between two strings is within some pre-determined error threshold. We also present some initial test results with thresholded edit distance computation. In them our algorithm works faster than the original algorithm of Myers.}, journal = {Nordic J. of Computing}, month = {mar}, pages = {29–39}, numpages = {11}, keywords = {bit-parallelism, approximate string matching, Levenshtein edit distance, Damerau edit distance} } @article{1974:WagFisch, author = {Wagner, Robert A. and Fischer, Michael J.}, title = {The String-to-String Correction Problem}, year = {1974}, issue_date = {Jan. 1974}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {21}, number = {1}, issn = {0004-5411}, url = {https://doi.org/10.1145/321796.321811}, doi = {10.1145/321796.321811}, abstract = {The string-to-string correction problem is to determine the distance between two strings as measured by the minimum cost sequence of “edit operations” needed to change the one string into the other. The edit operations investigated allow changing one symbol of a string into another single symbol, deleting one symbol from a string, or inserting a single symbol into a string. An algorithm is presented which solves this problem in time proportional to the product of the lengths of the two strings. Possible applications are to the problems of automatic spelling correction and determining the longest subsequence of characters common to two strings.}, journal = {J. ACM}, month = {jan}, pages = {168–173}, numpages = {6} }