mirror of https://github.com/explosion/spaCy.git
Rename ja morph features to Inflection and Reading (#9520)
* Rename ja morph features to Inflection and Reading
This commit is contained in:
parent
2ea9b58006
commit
0c97ed2746
|
@ -80,12 +80,12 @@ class JapaneseTokenizer(DummyTokenizer):
|
||||||
morph = {}
|
morph = {}
|
||||||
if dtoken.inf:
|
if dtoken.inf:
|
||||||
# it's normal for this to be empty for non-inflecting types
|
# it's normal for this to be empty for non-inflecting types
|
||||||
morph["inflection"] = dtoken.inf
|
morph["Inflection"] = dtoken.inf
|
||||||
token.norm_ = dtoken.norm
|
token.norm_ = dtoken.norm
|
||||||
if dtoken.reading:
|
if dtoken.reading:
|
||||||
# punctuation is its own reading, but we don't want values like
|
# punctuation is its own reading, but we don't want values like
|
||||||
# "=" here
|
# "=" here
|
||||||
morph["reading"] = re.sub("[=|]", "_", dtoken.reading)
|
morph["Reading"] = re.sub("[=|]", "_", dtoken.reading)
|
||||||
token.morph = MorphAnalysis(self.vocab, morph)
|
token.morph = MorphAnalysis(self.vocab, morph)
|
||||||
if self.need_subtokens:
|
if self.need_subtokens:
|
||||||
doc.user_data["sub_tokens"] = sub_tokens_list
|
doc.user_data["sub_tokens"] = sub_tokens_list
|
||||||
|
|
|
@ -144,9 +144,9 @@ def test_ja_tokenizer_inflections_reading_forms(
|
||||||
ja_tokenizer, text, inflections, reading_forms
|
ja_tokenizer, text, inflections, reading_forms
|
||||||
):
|
):
|
||||||
tokens = ja_tokenizer(text)
|
tokens = ja_tokenizer(text)
|
||||||
test_inflections = [tt.morph.get("inflection") for tt in tokens]
|
test_inflections = [tt.morph.get("Inflection") for tt in tokens]
|
||||||
assert test_inflections == list(inflections)
|
assert test_inflections == list(inflections)
|
||||||
test_readings = [tt.morph.get("reading") for tt in tokens]
|
test_readings = [tt.morph.get("Reading") for tt in tokens]
|
||||||
assert test_readings == list(reading_forms)
|
assert test_readings == list(reading_forms)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue