diff --git a/jellyfish/_jellyfish.py b/jellyfish/_jellyfish.py index f3a4c9b..4ebe264 100644 --- a/jellyfish/_jellyfish.py +++ b/jellyfish/_jellyfish.py @@ -1,16 +1,22 @@ import unicodedata from collections import defaultdict -from .compat import _range, _zip_longest, _no_bytes_err +from .compat import _range, _zip_longest, _no_bytes_err, IS_PY3 from .porter import Stemmer def _normalize(s): return unicodedata.normalize('NFKD', s) +def _check_type(s): + + if IS_PY3 and not isinstance(s, str): + raise TypeError('expected str or unicode, got %s' % type(s).__name__) + elif not IS_PY3 and not isinstance(s, unicode): + raise TypeError('expected unicode, got %s' % type(s).__name__) def levenshtein_distance(s1, s2): - if isinstance(s1, bytes) or isinstance(s2, bytes): - raise TypeError(_no_bytes_err) + _check_type(s1) + _check_type(s2) if s1 == s2: return 0 @@ -36,8 +42,8 @@ def levenshtein_distance(s1, s2): def _jaro_winkler(ying, yang, long_tolerance, winklerize): - if isinstance(ying, bytes) or isinstance(yang, bytes): - raise TypeError(_no_bytes_err) + _check_type(ying) + _check_type(yang) ying_len = len(ying) yang_len = len(yang) @@ -106,8 +112,8 @@ def _jaro_winkler(ying, yang, long_tolerance, winklerize): def damerau_levenshtein_distance(s1, s2): - if isinstance(s1, bytes) or isinstance(s2, bytes): - raise TypeError(_no_bytes_err) + _check_type(s1) + _check_type(s2) len1 = len(s1) len2 = len(s2) @@ -157,8 +163,8 @@ def jaro_winkler(s1, s2, long_tolerance=False): def soundex(s): if not s: return s - if isinstance(s, bytes): - raise TypeError(_no_bytes_err) + + _check_type(s) s = _normalize(s) @@ -197,8 +203,8 @@ def soundex(s): def hamming_distance(s1, s2): - if isinstance(s1, bytes) or isinstance(s2, bytes): - raise TypeError(_no_bytes_err) + _check_type(s1) + _check_type(s2) # ensure length of s1 >= s2 if len(s2) > len(s1): @@ -214,8 +220,8 @@ def hamming_distance(s1, s2): def nysiis(s): - if isinstance(s, bytes): - raise TypeError(_no_bytes_err) + _check_type(s) + if not s: return '' @@ -303,8 +309,8 @@ def nysiis(s): def match_rating_codex(s): - if isinstance(s, bytes): - raise TypeError(_no_bytes_err) + _check_type(s) + s = s.upper() codex = [] @@ -368,8 +374,7 @@ def match_rating_comparison(s1, s2): def metaphone(s): - if isinstance(s, bytes): - raise TypeError(_no_bytes_err) + _check_type(s) result = [] @@ -483,6 +488,6 @@ def metaphone(s): def porter_stem(s): - if isinstance(s, bytes): - raise TypeError(_no_bytes_err) + _check_type(s) + return Stemmer(s).stem() diff --git a/jellyfish/compat.py b/jellyfish/compat.py index b5e0979..180283d 100644 --- a/jellyfish/compat.py +++ b/jellyfish/compat.py @@ -6,8 +6,6 @@ IS_PY3 = sys.version_info[0] == 3 if IS_PY3: _range = range _zip_longest = itertools.zip_longest - _no_bytes_err = 'expected str, got bytes' else: _range = xrange _zip_longest = itertools.izip_longest - _no_bytes_err = 'expected unicode, got str'