mirror of https://github.com/explosion/spaCy.git
Tmp
This commit is contained in:
parent
12039e80ca
commit
e420e5a809
|
@ -16,6 +16,15 @@ from spacy.scorer import Scorer
|
|||
from spacy.language_data.tag_map import TAG_MAP as DEFAULT_TAG_MAP
|
||||
import spacy.attrs
|
||||
import io
|
||||
from thinc.neural.ops import CupyOps
|
||||
from thinc.neural import Model
|
||||
|
||||
try:
|
||||
import cupy
|
||||
print("Using GPU")
|
||||
Model.ops = CupyOps()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def read_conllx(loc, n=0):
|
||||
|
@ -137,10 +146,10 @@ def main(lang_name, train_loc, dev_loc, model_dir, clusters_loc=None):
|
|||
|
||||
Xs, ys = organize_data(vocab, train_sents)
|
||||
dev_Xs, dev_ys = organize_data(vocab, dev_sents)
|
||||
Xs = Xs[:500]
|
||||
ys = ys[:500]
|
||||
dev_Xs = dev_Xs[:100]
|
||||
dev_ys = dev_ys[:100]
|
||||
Xs = Xs
|
||||
ys = ys
|
||||
dev_Xs = dev_Xs[:1000]
|
||||
dev_ys = dev_ys[:1000]
|
||||
with encoder.model.begin_training(Xs[:100], ys[:100]) as (trainer, optimizer):
|
||||
docs = list(Xs)
|
||||
for doc in docs:
|
||||
|
@ -154,9 +163,9 @@ def main(lang_name, train_loc, dev_loc, model_dir, clusters_loc=None):
|
|||
print('%d:\t%.3f\t%.3f\t%.3f' % (itn, nn_loss[-1], scorer.uas, scorer.tags_acc))
|
||||
nn_loss.append(0.)
|
||||
trainer.each_epoch.append(track_progress)
|
||||
trainer.batch_size = 6
|
||||
trainer.nb_epoch = 10000
|
||||
for docs, golds in trainer.iterate(Xs, ys, progress_bar=False):
|
||||
trainer.batch_size = 12
|
||||
trainer.nb_epoch = 10
|
||||
for docs, golds in trainer.iterate(Xs, ys):
|
||||
docs = [Doc(vocab, words=[w.text for w in doc]) for doc in docs]
|
||||
tokvecs, upd_tokvecs = encoder.begin_update(docs)
|
||||
for doc, tokvec in zip(docs, tokvecs):
|
||||
|
@ -165,7 +174,7 @@ def main(lang_name, train_loc, dev_loc, model_dir, clusters_loc=None):
|
|||
tagger.update(doc, gold)
|
||||
d_tokvecs, loss = parser.update(docs, golds, sgd=optimizer)
|
||||
upd_tokvecs(d_tokvecs, sgd=optimizer)
|
||||
encoder.update(docs, golds, optimizer)
|
||||
encoder.update(docs, golds, sgd=optimizer)
|
||||
nn_loss[-1] += loss
|
||||
nlp = LangClass(vocab=vocab, tagger=tagger, parser=parser)
|
||||
nlp.end_training(model_dir)
|
||||
|
|
|
@ -132,7 +132,7 @@ cdef class Parser:
|
|||
"""
|
||||
self.parse_batch([tokens])
|
||||
self.moves.finalize_doc(tokens)
|
||||
|
||||
|
||||
def pipe(self, stream, int batch_size=1000, int n_threads=2):
|
||||
"""
|
||||
Process a stream of documents.
|
||||
|
@ -175,6 +175,7 @@ cdef class Parser:
|
|||
tokvecs = [d.tensor for d in docs]
|
||||
all_states = list(states)
|
||||
todo = zip(states, tokvecs)
|
||||
i = 0
|
||||
while todo:
|
||||
states, tokvecs = zip(*todo)
|
||||
scores, _ = self._begin_update(states, tokvecs)
|
||||
|
@ -182,6 +183,9 @@ cdef class Parser:
|
|||
action = self.moves.c[guess]
|
||||
action.do(state.c, action.label)
|
||||
todo = filter(lambda sp: not sp[0].py_is_final(), todo)
|
||||
i += 1
|
||||
if i >= 10000:
|
||||
break
|
||||
for state, doc in zip(all_states, docs):
|
||||
self.moves.finalize_state(state.c)
|
||||
for i in range(doc.length):
|
||||
|
@ -218,6 +222,7 @@ cdef class Parser:
|
|||
todo = zip(states, tokvecs, golds, d_tokens)
|
||||
assert len(states) == len(todo)
|
||||
losses = []
|
||||
i = 0
|
||||
while todo:
|
||||
states, tokvecs, golds, d_tokens = zip(*todo)
|
||||
scores, finish_update = self._begin_update(states, tokvecs)
|
||||
|
@ -232,6 +237,9 @@ cdef class Parser:
|
|||
|
||||
# Get unfinished states (and their matching gold and token gradients)
|
||||
todo = filter(lambda sp: not sp[0].py_is_final(), todo)
|
||||
i += 1
|
||||
if i >= 10000:
|
||||
break
|
||||
return output, sum(losses)
|
||||
|
||||
def _begin_update(self, states, tokvecs, drop=0.):
|
||||
|
@ -284,7 +292,7 @@ cdef class Parser:
|
|||
state.set_attributes(features[i], tokens[i], attr_names)
|
||||
state.set_token_vectors(tokvecs[i], all_tokvecs[i], tokens[i])
|
||||
return (tokens, features, tokvecs)
|
||||
|
||||
|
||||
def _validate_batch(self, int[:, ::1] is_valid, states):
|
||||
cdef StateClass state
|
||||
cdef int i
|
||||
|
|
Loading…
Reference in New Issue