mirror of https://github.com/explosion/spaCy.git
Merge remote-tracking branch 'refs/remotes/honnibal/master'
This commit is contained in:
commit
90c6c5fabf
|
@ -384,16 +384,16 @@ cdef class ArcEager(TransitionSystem):
|
||||||
for i in range(st.length):
|
for i in range(st.length):
|
||||||
# Always attach spaces to the previous word
|
# Always attach spaces to the previous word
|
||||||
if Lexeme.c_check_flag(st._sent[i].lex, IS_SPACE):
|
if Lexeme.c_check_flag(st._sent[i].lex, IS_SPACE):
|
||||||
if i >= 1:
|
|
||||||
st.add_arc(i-1, i, st._sent[i].dep)
|
|
||||||
else:
|
|
||||||
st.add_arc(i+1, i, st._sent[i].dep)
|
|
||||||
if st._sent[i].sent_start and st._sent[i].head == -1:
|
if st._sent[i].sent_start and st._sent[i].head == -1:
|
||||||
st._sent[i].sent_start = False
|
st._sent[i].sent_start = False
|
||||||
# If we had this space token as the start of a sentence,
|
# If we had this space token as the start of a sentence,
|
||||||
# move that sentence start forward one
|
# move that sentence start forward one
|
||||||
if (i + 1) < st.length and not st._sent[i+1].sent_start:
|
if (i + 1) < st.length and not st._sent[i+1].sent_start:
|
||||||
st._sent[i+1].sent_start = True
|
st._sent[i+1].sent_start = True
|
||||||
|
if i >= 1:
|
||||||
|
st.add_arc(i-1, i, st._sent[i].dep)
|
||||||
|
else:
|
||||||
|
st.add_arc(i+1, i, st._sent[i].dep)
|
||||||
elif st._sent[i].head == 0 and st._sent[i].dep == 0:
|
elif st._sent[i].head == 0 and st._sent[i].dep == 0:
|
||||||
st._sent[i].dep = self.root_label
|
st._sent[i].dep = self.root_label
|
||||||
# If we're not using the Break transition, we segment via root-labelled
|
# If we're not using the Break transition, we segment via root-labelled
|
||||||
|
|
|
@ -25,28 +25,34 @@ from thinc.learner import LinearModel
|
||||||
|
|
||||||
class TestLoadVocab(unittest.TestCase):
|
class TestLoadVocab(unittest.TestCase):
|
||||||
def test_load(self):
|
def test_load(self):
|
||||||
vocab = Vocab.from_dir(path.join(English.default_data_dir(), 'vocab'))
|
if path.exists(path.join(English.default_data_dir(), 'vocab')):
|
||||||
|
vocab = Vocab.from_dir(path.join(English.default_data_dir(), 'vocab'))
|
||||||
|
|
||||||
|
|
||||||
class TestLoadTokenizer(unittest.TestCase):
|
class TestLoadTokenizer(unittest.TestCase):
|
||||||
def test_load(self):
|
def test_load(self):
|
||||||
data_dir = English.default_data_dir()
|
data_dir = English.default_data_dir()
|
||||||
vocab = Vocab.from_dir(path.join(data_dir, 'vocab'))
|
if path.exists(path.join(data_dir, 'vocab')):
|
||||||
tokenizer = Tokenizer.from_dir(vocab, path.join(data_dir, 'tokenizer'))
|
vocab = Vocab.from_dir(path.join(data_dir, 'vocab'))
|
||||||
|
tokenizer = Tokenizer.from_dir(vocab, path.join(data_dir, 'tokenizer'))
|
||||||
|
|
||||||
|
|
||||||
class TestLoadTagger(unittest.TestCase):
|
class TestLoadTagger(unittest.TestCase):
|
||||||
def test_load(self):
|
def test_load(self):
|
||||||
data_dir = English.default_data_dir()
|
data_dir = English.default_data_dir()
|
||||||
vocab = Vocab.from_dir(path.join(data_dir, 'vocab'))
|
|
||||||
tagger = Tagger.from_dir(path.join(data_dir, 'tagger'), vocab)
|
if path.exists(path.join(data_dir, 'vocab')):
|
||||||
|
vocab = Vocab.from_dir(path.join(data_dir, 'vocab'))
|
||||||
|
tagger = Tagger.from_dir(path.join(data_dir, 'tagger'), vocab)
|
||||||
|
|
||||||
|
|
||||||
class TestLoadParser(unittest.TestCase):
|
class TestLoadParser(unittest.TestCase):
|
||||||
def test_load(self):
|
def test_load(self):
|
||||||
data_dir = English.default_data_dir()
|
data_dir = English.default_data_dir()
|
||||||
vocab = Vocab.from_dir(path.join(data_dir, 'vocab'))
|
if path.exists(path.join(data_dir, 'vocab')):
|
||||||
parser = Parser.from_dir(path.join(data_dir, 'deps'), vocab.strings, ArcEager)
|
vocab = Vocab.from_dir(path.join(data_dir, 'vocab'))
|
||||||
|
if path.exists(path.join(data_dir, 'deps')):
|
||||||
|
parser = Parser.from_dir(path.join(data_dir, 'deps'), vocab.strings, ArcEager)
|
||||||
|
|
||||||
def test_load_careful(self):
|
def test_load_careful(self):
|
||||||
config_data = {"labels": {"0": {"": True}, "1": {"": True}, "2": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "dobj": True, "neg": True, "csubjpass": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "relcl": True, "quantmod": True, "acomp": True, "compound": True, "pcomp": True, "intj": True, "poss": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "amod": True, "dative": True, "pobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True, "acl": True}, "3": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "acl": True, "poss": True, "neg": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "amod": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "quantmod": True, "acomp": True, "pcomp": True, "intj": True, "relcl": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "dobj": True, "dative": True, "pobj": True, "iobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True}, "4": {"ROOT": True}}, "seed": 0, "features": "basic", "beam_width": 1}
|
config_data = {"labels": {"0": {"": True}, "1": {"": True}, "2": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "dobj": True, "neg": True, "csubjpass": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "relcl": True, "quantmod": True, "acomp": True, "compound": True, "pcomp": True, "intj": True, "poss": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "amod": True, "dative": True, "pobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True, "acl": True}, "3": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "acl": True, "poss": True, "neg": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "amod": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "quantmod": True, "acomp": True, "pcomp": True, "intj": True, "relcl": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "dobj": True, "dative": True, "pobj": True, "iobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True}, "4": {"ROOT": True}}, "seed": 0, "features": "basic", "beam_width": 1}
|
||||||
|
@ -67,8 +73,9 @@ class TestLoadParser(unittest.TestCase):
|
||||||
|
|
||||||
# n classes. moves.n_moves above
|
# n classes. moves.n_moves above
|
||||||
# n features. len(templates) + 1 above
|
# n features. len(templates) + 1 above
|
||||||
model = LinearModel(92, 116)
|
if path.exists(model_loc):
|
||||||
model.load(model_loc)
|
model = LinearModel(92, 116)
|
||||||
|
model.load(model_loc)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in New Issue