mirror of https://github.com/explosion/spaCy.git
* Move spacy.syntax.conll to spacy.gold
This commit is contained in:
parent
765b61cac4
commit
fc75210941
|
@ -20,8 +20,8 @@ from spacy.en.pos import POS_TEMPLATES, POS_TAGS, setup_model_dir
|
|||
from spacy.syntax.parser import GreedyParser
|
||||
from spacy.syntax.parser import OracleError
|
||||
from spacy.syntax.util import Config
|
||||
from spacy.syntax.conll import read_json_file
|
||||
from spacy.syntax.conll import GoldParse
|
||||
from spacy.gold import read_json_file
|
||||
from spacy.gold import GoldParse
|
||||
|
||||
from spacy.scorer import Scorer
|
||||
|
||||
|
@ -65,11 +65,13 @@ def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic', seed=0
|
|||
gold_tuples = gold_tuples[:n_sents]
|
||||
nlp = Language(data_dir=model_dir)
|
||||
|
||||
print "Itn.\tUAS\tNER F.\tTag %\tToken %"
|
||||
print "Itn.\tP.Loss\tUAS\tNER F.\tTag %\tToken %"
|
||||
for itn in range(n_iter):
|
||||
scorer = Scorer()
|
||||
loss = 0
|
||||
for raw_text, annot_tuples, ctnt in gold_tuples:
|
||||
raw_text = ''.join(add_noise(c, corruption_level) for c in raw_text)
|
||||
if corruption_level != 0:
|
||||
raw_text = ''.join(add_noise(c, corruption_level) for c in raw_text)
|
||||
tokens = nlp(raw_text, merge_mwes=False)
|
||||
gold = GoldParse(tokens, annot_tuples)
|
||||
scorer.score(tokens, gold, verbose=False)
|
||||
|
@ -79,7 +81,7 @@ def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic', seed=0
|
|||
gold = GoldParse(tokens, annot_tuples)
|
||||
nlp.tagger(tokens)
|
||||
try:
|
||||
nlp.parser.train(tokens, gold)
|
||||
loss += nlp.parser.train(tokens, gold)
|
||||
except AssertionError:
|
||||
# TODO: Do something about non-projective sentences
|
||||
continue
|
||||
|
@ -87,7 +89,7 @@ def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic', seed=0
|
|||
nlp.entity.train(tokens, gold)
|
||||
nlp.tagger.train(tokens, gold.tags)
|
||||
|
||||
print '%d:\t%.3f\t%.3f\t%.3f\t%.3f' % (itn, scorer.uas, scorer.ents_f,
|
||||
print '%d:\t%d\t%.3f\t%.3f\t%.3f\t%.3f' % (itn, loss, scorer.uas, scorer.ents_f,
|
||||
scorer.tags_acc,
|
||||
scorer.token_acc)
|
||||
random.shuffle(gold_tuples)
|
||||
|
@ -148,15 +150,16 @@ def get_sents(json_loc):
|
|||
model_dir=("Location of output model directory",),
|
||||
out_loc=("Out location", "option", "o", str),
|
||||
n_sents=("Number of training sentences", "option", "n", int),
|
||||
n_iter=("Number of training iterations", "option", "i", int),
|
||||
verbose=("Verbose error reporting", "flag", "v", bool),
|
||||
debug=("Debug mode", "flag", "d", bool)
|
||||
)
|
||||
def main(train_loc, dev_loc, model_dir, n_sents=0, out_loc="", verbose=False,
|
||||
def main(train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
|
||||
debug=False, corruption_level=0.0):
|
||||
train(English, read_json_file(train_loc), model_dir,
|
||||
feat_set='basic' if not debug else 'debug',
|
||||
gold_preproc=False, n_sents=n_sents,
|
||||
corruption_level=corruption_level)
|
||||
corruption_level=corruption_level, n_iter=n_iter)
|
||||
if out_loc:
|
||||
write_parses(English, dev_loc, model_dir, out_loc)
|
||||
scorer = evaluate(English, read_json_file(dev_loc),
|
||||
|
|
2
setup.py
2
setup.py
|
@ -152,7 +152,7 @@ MOD_NAMES = ['spacy.parts_of_speech', 'spacy.strings',
|
|||
'spacy.en.pos', 'spacy.syntax.parser', 'spacy.syntax._state',
|
||||
'spacy.syntax.transition_system',
|
||||
'spacy.syntax.arc_eager', 'spacy.syntax._parse_features',
|
||||
'spacy.syntax.conll', 'spacy.orth',
|
||||
'spacy.gold', 'spacy.orth',
|
||||
'spacy.syntax.ner']
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from cymem.cymem cimport Pool
|
||||
|
||||
from ..structs cimport TokenC
|
||||
from .transition_system cimport Transition
|
||||
from .structs cimport TokenC
|
||||
from .syntax.transition_system cimport Transition
|
||||
|
||||
cimport numpy
|
||||
|
|
@ -2,7 +2,7 @@ import numpy
|
|||
import codecs
|
||||
import json
|
||||
import random
|
||||
from spacy.munge.alignment import align
|
||||
from .munge.alignment import align
|
||||
|
||||
from libc.string cimport memset
|
||||
|
|
@ -10,7 +10,7 @@ from ._state cimport count_left_kids
|
|||
from ..structs cimport TokenC
|
||||
|
||||
from .transition_system cimport do_func_t, get_cost_func_t
|
||||
from .conll cimport GoldParse
|
||||
from ..gold cimport GoldParse
|
||||
|
||||
|
||||
DEF NON_MONOTONIC = True
|
||||
|
|
|
@ -8,7 +8,7 @@ from .transition_system cimport do_func_t
|
|||
from ..structs cimport TokenC, Entity
|
||||
|
||||
from thinc.typedefs cimport weight_t
|
||||
from .conll cimport GoldParse
|
||||
from ..gold cimport GoldParse
|
||||
|
||||
|
||||
cdef enum:
|
||||
|
|
|
@ -30,7 +30,7 @@ from .arc_eager cimport TransitionSystem, Transition
|
|||
from .transition_system import OracleError
|
||||
|
||||
from ._state cimport new_state, State, is_final, get_idx, get_s0, get_s1, get_n0, get_n1
|
||||
from .conll cimport GoldParse
|
||||
from ..gold cimport GoldParse
|
||||
|
||||
from . import _parse_features
|
||||
from ._parse_features cimport fill_context, CONTEXT_SIZE
|
||||
|
@ -107,14 +107,21 @@ cdef class GreedyParser:
|
|||
cdef Transition guess
|
||||
cdef Transition best
|
||||
cdef atom_t[CONTEXT_SIZE] context
|
||||
loss = 0
|
||||
while not is_final(state):
|
||||
|
||||
fill_context(context, state)
|
||||
scores = self.model.score(context)
|
||||
guess = self.moves.best_valid(scores, state)
|
||||
best = self.moves.best_gold(scores, state, gold)
|
||||
#print self.moves.move_name(guess.move, guess.label),
|
||||
#print self.moves.move_name(best.move, best.label),
|
||||
#print print_state(state, py_words)
|
||||
|
||||
cost = guess.get_cost(&guess, state, gold)
|
||||
self.model.update(context, guess.clas, best.clas, cost)
|
||||
|
||||
guess.do(&guess, state)
|
||||
loss += cost
|
||||
self.moves.finalize_state(state)
|
||||
return loss
|
||||
|
|
|
@ -3,7 +3,7 @@ from thinc.typedefs cimport weight_t
|
|||
|
||||
from ..structs cimport TokenC
|
||||
from ._state cimport State
|
||||
from .conll cimport GoldParse
|
||||
from ..gold cimport GoldParse
|
||||
from ..strings cimport StringStore
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue