mirror of https://github.com/explosion/spaCy.git
Remove unnecessary namedtuple/dataclass
This commit is contained in:
parent
8b02fff097
commit
499c39acba
|
@ -1,8 +1,6 @@
|
||||||
# encoding: utf8
|
# encoding: utf8
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from .stop_words import STOP_WORDS
|
from .stop_words import STOP_WORDS
|
||||||
from .tag_map import TAG_MAP
|
from .tag_map import TAG_MAP
|
||||||
from ...attrs import LANG
|
from ...attrs import LANG
|
||||||
|
@ -10,35 +8,12 @@ from ...language import Language
|
||||||
from ...tokens import Doc
|
from ...tokens import Doc
|
||||||
from ...compat import copy_reg
|
from ...compat import copy_reg
|
||||||
from ...util import DummyTokenizer
|
from ...util import DummyTokenizer
|
||||||
from ...compat import is_python3, is_python_pre_3_5
|
|
||||||
|
|
||||||
is_python_post_3_7 = is_python3 and sys.version_info[1] >= 7
|
|
||||||
|
|
||||||
# fmt: off
|
|
||||||
if is_python_pre_3_5:
|
|
||||||
from collections import namedtuple
|
|
||||||
Morpheme = namedtuple("Morpheme", "surface lemma tag")
|
|
||||||
elif is_python_post_3_7:
|
|
||||||
from dataclasses import dataclass
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class Morpheme:
|
|
||||||
surface: str
|
|
||||||
lemma: str
|
|
||||||
tag: str
|
|
||||||
else:
|
|
||||||
from typing import NamedTuple
|
|
||||||
|
|
||||||
class Morpheme(NamedTuple):
|
|
||||||
|
|
||||||
surface = str("")
|
|
||||||
lemma = str("")
|
|
||||||
tag = str("")
|
|
||||||
|
|
||||||
|
|
||||||
def try_mecab_import():
|
def try_mecab_import():
|
||||||
try:
|
try:
|
||||||
from natto import MeCab
|
from natto import MeCab
|
||||||
|
|
||||||
return MeCab
|
return MeCab
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
|
@ -46,6 +21,8 @@ def try_mecab_import():
|
||||||
"[mecab-ko-dic](https://bitbucket.org/eunjeon/mecab-ko-dic), "
|
"[mecab-ko-dic](https://bitbucket.org/eunjeon/mecab-ko-dic), "
|
||||||
"and [natto-py](https://github.com/buruzaemon/natto-py)"
|
"and [natto-py](https://github.com/buruzaemon/natto-py)"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# fmt: on
|
# fmt: on
|
||||||
|
|
||||||
|
|
||||||
|
@ -72,10 +49,10 @@ class KoreanTokenizer(DummyTokenizer):
|
||||||
surfaces = [dt.surface for dt in dtokens]
|
surfaces = [dt.surface for dt in dtokens]
|
||||||
doc = Doc(self.vocab, words=surfaces, spaces=list(check_spaces(text, surfaces)))
|
doc = Doc(self.vocab, words=surfaces, spaces=list(check_spaces(text, surfaces)))
|
||||||
for token, dtoken in zip(doc, dtokens):
|
for token, dtoken in zip(doc, dtokens):
|
||||||
first_tag, sep, eomi_tags = dtoken.tag.partition("+")
|
first_tag, sep, eomi_tags = dtoken["tag"].partition("+")
|
||||||
token.tag_ = first_tag # stem(어간) or pre-final(선어말 어미)
|
token.tag_ = first_tag # stem(어간) or pre-final(선어말 어미)
|
||||||
token.lemma_ = dtoken.lemma
|
token.lemma_ = dtoken["lemma"]
|
||||||
doc.user_data["full_tags"] = [dt.tag for dt in dtokens]
|
doc.user_data["full_tags"] = [dt["tag"] for dt in dtokens]
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def detailed_tokens(self, text):
|
def detailed_tokens(self, text):
|
||||||
|
@ -91,7 +68,7 @@ class KoreanTokenizer(DummyTokenizer):
|
||||||
lemma, _, remainder = expr.partition("/")
|
lemma, _, remainder = expr.partition("/")
|
||||||
if lemma == "*":
|
if lemma == "*":
|
||||||
lemma = surface
|
lemma = surface
|
||||||
yield Morpheme(surface, lemma, tag)
|
yield {"surface": surface, "lemma": lemma, "tag": tag}
|
||||||
|
|
||||||
|
|
||||||
class KoreanDefaults(Language.Defaults):
|
class KoreanDefaults(Language.Defaults):
|
||||||
|
|
Loading…
Reference in New Issue