Rename ja morph features to Inflection and Reading (#9520)

* Rename ja morph features to Inflection and Reading
This commit is contained in:
Adriane Boyd 2021-10-27 13:13:03 +02:00 committed by GitHub
parent 2ea9b58006
commit 0c97ed2746
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 4 additions and 4 deletions

View File

@ -80,12 +80,12 @@ class JapaneseTokenizer(DummyTokenizer):
morph = {}
if dtoken.inf:
# it's normal for this to be empty for non-inflecting types
morph["inflection"] = dtoken.inf
morph["Inflection"] = dtoken.inf
token.norm_ = dtoken.norm
if dtoken.reading:
# punctuation is its own reading, but we don't want values like
# "=" here
morph["reading"] = re.sub("[=|]", "_", dtoken.reading)
morph["Reading"] = re.sub("[=|]", "_", dtoken.reading)
token.morph = MorphAnalysis(self.vocab, morph)
if self.need_subtokens:
doc.user_data["sub_tokens"] = sub_tokens_list

View File

@ -144,9 +144,9 @@ def test_ja_tokenizer_inflections_reading_forms(
ja_tokenizer, text, inflections, reading_forms
):
tokens = ja_tokenizer(text)
test_inflections = [tt.morph.get("inflection") for tt in tokens]
test_inflections = [tt.morph.get("Inflection") for tt in tokens]
assert test_inflections == list(inflections)
test_readings = [tt.morph.get("reading") for tt in tokens]
test_readings = [tt.morph.get("Reading") for tt in tokens]
assert test_readings == list(reading_forms)