mirror of https://github.com/explosion/spaCy.git
* Fix empty values in attributes and parts of speech, so symbols align correctly with the StringStore
This commit is contained in:
parent
e18fbcb604
commit
20e909d2bb
|
@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
|||
|
||||
|
||||
IDS = {
|
||||
"NO_TAG": NO_TAG,
|
||||
"": NO_TAG,
|
||||
"ADJ": ADJ,
|
||||
"ADP": ADP,
|
||||
"ADV": ADV,
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
SYMBOL_IDS = {
|
||||
"EMPTY_VALUE": EMPTY_VALUE,
|
||||
"Attr_is_alpha": Attr_is_alpha,
|
||||
"Attr_is_ascii": Attr_is_ascii,
|
||||
"Attr_is_digit": Attr_is_digit,
|
||||
|
|
|
@ -76,8 +76,10 @@ cdef class Vocab:
|
|||
# strings are loaded first, because the vocab is open-class, and these
|
||||
# symbols are closed class.
|
||||
for name in attrs.NAMES:
|
||||
if name:
|
||||
_ = self.strings[name]
|
||||
for name in parts_of_speech.NAMES:
|
||||
if name:
|
||||
_ = self.strings[name]
|
||||
#for morph_name in UNIV_MORPH_NAMES:
|
||||
# _ = self.strings[morph_name]
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
from __future__ import unicode_literals
|
||||
import pytest
|
||||
|
||||
from spacy.attrs import LEMMA, ORTH, PROB, IS_ALPHA
|
||||
from spacy.parts_of_speech import NOUN, VERB
|
||||
|
||||
|
||||
def test_neq(en_vocab):
|
||||
addr = en_vocab['Hello']
|
||||
|
@ -25,3 +28,13 @@ def test_punct_neq(en_vocab):
|
|||
def test_shape_attr(en_vocab):
|
||||
example = en_vocab['example']
|
||||
assert example.orth != example.shape
|
||||
|
||||
|
||||
def test_symbols(en_vocab):
|
||||
assert en_vocab.strings['IS_ALPHA'] == IS_ALPHA
|
||||
assert en_vocab.strings['NOUN'] == NOUN
|
||||
assert en_vocab.strings['VERB'] == VERB
|
||||
assert en_vocab.strings['LEMMA'] == LEMMA
|
||||
assert en_vocab.strings['ORTH'] == ORTH
|
||||
assert en_vocab.strings['PROB'] == PROB
|
||||
|
||||
|
|
Loading…
Reference in New Issue