diff --git a/spacy/lang/en/lex_attrs.py b/spacy/lang/en/lex_attrs.py index fcc7c6bf2..b630a317d 100644 --- a/spacy/lang/en/lex_attrs.py +++ b/spacy/lang/en/lex_attrs.py @@ -35,7 +35,7 @@ def like_num(text: str) -> bool: # Check ordinal number if text_lower in _ordinal_words: return True - if text_lower.endswith("th"): + if text_lower.endswith(("st", "nd", "rd", "th")): if text_lower[:-2].isdigit(): return True return False diff --git a/spacy/tests/lang/en/test_text.py b/spacy/tests/lang/en/test_text.py index 733e814f7..358f4c0f9 100644 --- a/spacy/tests/lang/en/test_text.py +++ b/spacy/tests/lang/en/test_text.py @@ -56,7 +56,9 @@ def test_lex_attrs_like_number(en_tokenizer, text, match): assert tokens[0].like_num == match -@pytest.mark.parametrize("word", ["third", "Millionth", "100th", "Hundredth"]) +@pytest.mark.parametrize( + "word", ["third", "Millionth", "100th", "Hundredth", "23rd", "52nd"] +) def test_en_lex_attrs_like_number_for_ordinal(word): assert like_num(word)