diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py index 33335a189..127c4c8ac 100644 --- a/spacy/lang/ja/__init__.py +++ b/spacy/lang/ja/__init__.py @@ -80,12 +80,12 @@ class JapaneseTokenizer(DummyTokenizer): morph = {} if dtoken.inf: # it's normal for this to be empty for non-inflecting types - morph["inflection"] = dtoken.inf + morph["Inflection"] = dtoken.inf token.norm_ = dtoken.norm if dtoken.reading: # punctuation is its own reading, but we don't want values like # "=" here - morph["reading"] = re.sub("[=|]", "_", dtoken.reading) + morph["Reading"] = re.sub("[=|]", "_", dtoken.reading) token.morph = MorphAnalysis(self.vocab, morph) if self.need_subtokens: doc.user_data["sub_tokens"] = sub_tokens_list diff --git a/spacy/tests/lang/ja/test_tokenizer.py b/spacy/tests/lang/ja/test_tokenizer.py index eb170061a..098884cf0 100644 --- a/spacy/tests/lang/ja/test_tokenizer.py +++ b/spacy/tests/lang/ja/test_tokenizer.py @@ -144,9 +144,9 @@ def test_ja_tokenizer_inflections_reading_forms( ja_tokenizer, text, inflections, reading_forms ): tokens = ja_tokenizer(text) - test_inflections = [tt.morph.get("inflection") for tt in tokens] + test_inflections = [tt.morph.get("Inflection") for tt in tokens] assert test_inflections == list(inflections) - test_readings = [tt.morph.get("reading") for tt in tokens] + test_readings = [tt.morph.get("Reading") for tt in tokens] assert test_readings == list(reading_forms)