mirror of https://github.com/explosion/spaCy.git
added tag_map for indonesian (#3515)
* added tag_map for indonesian * changed tag map from .py to .txt to see if tests pass * added symbols import * added utf8 encoding flag * added missing SCONJ symbol * Auto-format * Remove unused imports * Make tag map available in Indonesian defaults
This commit is contained in:
parent
c23e234d65
commit
6cdb7b2e04
|
@ -8,6 +8,7 @@ from .norm_exceptions import NORM_EXCEPTIONS
|
|||
from .lemmatizer import LOOKUP
|
||||
from .lex_attrs import LEX_ATTRS
|
||||
from .syntax_iterators import SYNTAX_ITERATORS
|
||||
from .tag_map import TAG_MAP
|
||||
|
||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||
from ..norm_exceptions import BASE_NORMS
|
||||
|
@ -30,6 +31,7 @@ class IndonesianDefaults(Language.Defaults):
|
|||
infixes = TOKENIZER_INFIXES
|
||||
syntax_iterators = SYNTAX_ITERATORS
|
||||
lemma_lookup = LOOKUP
|
||||
tag_map = TAG_MAP
|
||||
|
||||
|
||||
class Indonesian(Language):
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ...symbols import POS, PUNCT, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
|
||||
from ...symbols import NOUN, PRON, AUX, SCONJ
|
||||
|
||||
|
||||
# POS explanations for indonesian available from https://www.aclweb.org/anthology/Y12-1014
|
||||
TAG_MAP = {
|
||||
"NSD": {POS: NOUN},
|
||||
"Z–": {POS: PUNCT},
|
||||
"VSA": {POS: VERB},
|
||||
"CC-": {POS: NUM},
|
||||
"R–": {POS: ADP},
|
||||
"D–": {POS: ADV},
|
||||
"ASP": {POS: ADJ},
|
||||
"S–": {POS: SCONJ},
|
||||
"VSP": {POS: VERB},
|
||||
"H–": {POS: CCONJ},
|
||||
"F–": {POS: X},
|
||||
"B–": {POS: DET},
|
||||
"CO-": {POS: NUM},
|
||||
"G–": {POS: ADV},
|
||||
"PS3": {POS: PRON},
|
||||
"W–": {POS: ADV},
|
||||
"O–": {POS: AUX},
|
||||
"PP1": {POS: PRON},
|
||||
"ASS": {POS: ADJ},
|
||||
"PS1": {POS: PRON},
|
||||
"APP": {POS: ADJ},
|
||||
"CD-": {POS: NUM},
|
||||
"VPA": {POS: VERB},
|
||||
"VPP": {POS: VERB},
|
||||
}
|
Loading…
Reference in New Issue