* Fix empty values in attributes and parts of speech, so symbols align correctly with the StringStore

This commit is contained in:
Matthew Honnibal 2015-10-10 18:27:03 +11:00
parent e18fbcb604
commit 20e909d2bb
4 changed files with 18 additions and 4 deletions

View File

@ -2,7 +2,7 @@ from __future__ import unicode_literals
IDS = {
"NO_TAG": NO_TAG,
"": NO_TAG,
"ADJ": ADJ,
"ADP": ADP,
"ADV": ADV,

View File

@ -1,5 +1,4 @@
SYMBOL_IDS = {
"EMPTY_VALUE": EMPTY_VALUE,
"Attr_is_alpha": Attr_is_alpha,
"Attr_is_ascii": Attr_is_ascii,
"Attr_is_digit": Attr_is_digit,

View File

@ -76,8 +76,10 @@ cdef class Vocab:
# strings are loaded first, because the vocab is open-class, and these
# symbols are closed class.
for name in attrs.NAMES:
if name:
_ = self.strings[name]
for name in parts_of_speech.NAMES:
if name:
_ = self.strings[name]
#for morph_name in UNIV_MORPH_NAMES:
# _ = self.strings[morph_name]

View File

@ -1,6 +1,9 @@
from __future__ import unicode_literals
import pytest
from spacy.attrs import LEMMA, ORTH, PROB, IS_ALPHA
from spacy.parts_of_speech import NOUN, VERB
def test_neq(en_vocab):
addr = en_vocab['Hello']
@ -25,3 +28,13 @@ def test_punct_neq(en_vocab):
def test_shape_attr(en_vocab):
example = en_vocab['example']
assert example.orth != example.shape
def test_symbols(en_vocab):
assert en_vocab.strings['IS_ALPHA'] == IS_ALPHA
assert en_vocab.strings['NOUN'] == NOUN
assert en_vocab.strings['VERB'] == VERB
assert en_vocab.strings['LEMMA'] == LEMMA
assert en_vocab.strings['ORTH'] == ORTH
assert en_vocab.strings['PROB'] == PROB