From 2512ea9eeba228c59d229029bfcb0775ca622148 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 14 Nov 2017 02:11:40 +0100 Subject: [PATCH 1/3] Fix memory leak in beam parser --- spacy/syntax/_beam_utils.pyx | 51 +++++++++---------- spacy/syntax/arc_eager.pyx | 28 ++++++----- spacy/syntax/ner.pyx | 28 +++++------ spacy/syntax/nn_parser.pyx | 78 +++++++++++++++++++----------- spacy/syntax/stateclass.pxd | 10 ++++ spacy/syntax/stateclass.pyx | 4 +- spacy/syntax/transition_system.pyx | 3 +- 7 files changed, 118 insertions(+), 84 deletions(-) diff --git a/spacy/syntax/_beam_utils.pyx b/spacy/syntax/_beam_utils.pyx index 8a1e5a5fe..b26d13e4d 100644 --- a/spacy/syntax/_beam_utils.pyx +++ b/spacy/syntax/_beam_utils.pyx @@ -9,36 +9,31 @@ from thinc.typedefs cimport hash_t, class_t from thinc.extra.search cimport MaxViolation from .transition_system cimport TransitionSystem, Transition -from .stateclass cimport StateClass from ..gold cimport GoldParse +from .stateclass cimport StateC, StateClass # These are passed as callbacks to thinc.search.Beam cdef int _transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1: - dest = _dest - src = _src + dest = _dest + src = _src moves = _moves dest.clone(src) - moves[clas].do(dest.c, moves[clas].label) - dest.c.push_hist(clas) + moves[clas].do(dest, moves[clas].label) + dest.push_hist(clas) cdef int _check_final_state(void* _state, void* extra_args) except -1: - return (_state).is_final() - - -def _cleanup(Beam beam): - for i in range(beam.width): - Py_XDECREF(beam._states[i].content) - Py_XDECREF(beam._parents[i].content) + state = _state + return state.is_final() cdef hash_t _hash_state(void* _state, void* _) except 0: - state = _state - if state.c.is_final(): + state = _state + if state.is_final(): return 1 else: - return state.c.hash() + return state.hash() cdef class ParserBeam(object): @@ -55,14 +50,15 @@ cdef class ParserBeam(object): self.golds = golds self.beams = [] cdef Beam beam - cdef StateClass state, st + cdef StateClass state + cdef StateC* st for state in states: beam = Beam(self.moves.n_moves, width, density) beam.initialize(self.moves.init_beam_state, state.c.length, state.c._sent) for i in range(beam.width): - st = beam.at(i) - st.c.offset = state.c.offset + st = beam.at(i) + st.offset = state.c.offset self.beams.append(beam) self.dones = [False] * len(self.beams) @@ -86,13 +82,14 @@ cdef class ParserBeam(object): if self.golds is not None: self._set_costs(beam, self.golds[i], follow_gold=follow_gold) if follow_gold: - beam.advance(_transition_state, NULL, self.moves.c) + beam.advance(_transition_state, _hash_state, self.moves.c) else: beam.advance(_transition_state, _hash_state, self.moves.c) beam.check_done(_check_final_state, NULL) + # This handles the non-monotonic stuff for the parser. if beam.is_done and self.golds is not None: for j in range(beam.size): - state = beam.at(j) + state = StateClass.borrow(beam.at(j)) if state.is_final(): try: if self.moves.is_gold_parse(state, self.golds[i]): @@ -107,11 +104,11 @@ cdef class ParserBeam(object): cdef int nr_state = min(scores.shape[0], beam.size) cdef int nr_class = scores.shape[1] for i in range(nr_state): - state = beam.at(i) + state = beam.at(i) if not state.is_final(): for j in range(nr_class): beam.scores[i][j] = c_scores[i * nr_class + j] - self.moves.set_valid(beam.is_valid[i], state.c) + self.moves.set_valid(beam.is_valid[i], state) else: for j in range(beam.nr_class): beam.scores[i][j] = 0 @@ -119,8 +116,8 @@ cdef class ParserBeam(object): def _set_costs(self, Beam beam, GoldParse gold, int follow_gold=False): for i in range(beam.size): - state = beam.at(i) - if not state.c.is_final(): + state = StateClass.borrow(beam.at(i)) + if not state.is_final(): self.moves.set_costs(beam.is_valid[i], beam.costs[i], state, gold) if follow_gold: @@ -157,7 +154,7 @@ def update_beam(TransitionSystem moves, int nr_feature, int max_steps, pbeam = ParserBeam(moves, states, golds, width=width, density=density) gbeam = ParserBeam(moves, states, golds, - width=width, density=0.0) + width=width, density=density) cdef StateClass state beam_maps = [] backprops = [] @@ -231,7 +228,7 @@ def get_states(pbeams, gbeams, beam_map, nr_update): p_indices.append([]) g_indices.append([]) for i in range(pbeam.size): - state = pbeam.at(i) + state = StateClass.borrow(pbeam.at(i)) if not state.is_final(): key = tuple([eg_id] + pbeam.histories[i]) assert key not in seen, (key, seen) @@ -240,7 +237,7 @@ def get_states(pbeams, gbeams, beam_map, nr_update): states.append(state) beam_map.update(seen) for i in range(gbeam.size): - state = gbeam.at(i) + state = StateClass.borrow(gbeam.at(i)) if not state.is_final(): key = tuple([eg_id] + gbeam.histories[i]) if key in seen: diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx index b3c9b5563..16d55db24 100644 --- a/spacy/syntax/arc_eager.pyx +++ b/spacy/syntax/arc_eager.pyx @@ -292,12 +292,16 @@ cdef int _get_root(int word, const GoldParseC* gold) nogil: cdef void* _init_state(Pool mem, int length, void* tokens) except NULL: - cdef StateClass st = StateClass.init(tokens, length) - for i in range(st.c.length): - st.c._sent[i].l_edge = i - st.c._sent[i].r_edge = i + st = new StateC(tokens, length) + for i in range(st.length): + if st._sent[i].dep == 0: + st._sent[i].l_edge = i + st._sent[i].r_edge = i + st._sent[i].head = 0 + st._sent[i].dep = 0 + st._sent[i].l_kids = 0 + st._sent[i].r_kids = 0 st.fast_forward() - Py_INCREF(st) return st @@ -533,18 +537,18 @@ cdef class ArcEager(TransitionSystem): assert n_gold >= 1 def get_beam_annot(self, Beam beam): - length = (beam.at(0)).c.length + length = (beam.at(0)).length heads = [{} for _ in range(length)] deps = [{} for _ in range(length)] probs = beam.probs for i in range(beam.size): - stcls = beam.at(i) - self.finalize_state(stcls.c) - if stcls.is_final(): + state = beam.at(i) + self.finalize_state(state) + if state.is_final(): prob = probs[i] - for j in range(stcls.c.length): - head = j + stcls.c._sent[j].head - dep = stcls.c._sent[j].dep + for j in range(state.length): + head = j + state._sent[j].head + dep = state._sent[j].dep heads[j].setdefault(head, 0.0) heads[j][head] += prob deps[j].setdefault(dep, 0.0) diff --git a/spacy/syntax/ner.pyx b/spacy/syntax/ner.pyx index e2e242aea..999760ce0 100644 --- a/spacy/syntax/ner.pyx +++ b/spacy/syntax/ner.pyx @@ -123,14 +123,14 @@ cdef class BiluoPushDown(TransitionSystem): entities = {} probs = beam.probs for i in range(beam.size): - stcls = beam.at(i) - if stcls.is_final(): - self.finalize_state(stcls.c) + state = beam.at(i) + if state.is_final(): + self.finalize_state(state) prob = probs[i] - for j in range(stcls.c._e_i): - start = stcls.c._ents[j].start - end = stcls.c._ents[j].end - label = stcls.c._ents[j].label + for j in range(state._e_i): + start = state._ents[j].start + end = state._ents[j].end + label = state._ents[j].label entities.setdefault((start, end, label), 0.0) entities[(start, end, label)] += prob return entities @@ -139,15 +139,15 @@ cdef class BiluoPushDown(TransitionSystem): parses = [] probs = beam.probs for i in range(beam.size): - stcls = beam.at(i) - if stcls.is_final(): - self.finalize_state(stcls.c) + state = beam.at(i) + if state.is_final(): + self.finalize_state(state) prob = probs[i] parse = [] - for j in range(stcls.c._e_i): - start = stcls.c._ents[j].start - end = stcls.c._ents[j].end - label = stcls.c._ents[j].label + for j in range(state._e_i): + start = state._ents[j].start + end = state._ents[j].end + label = state._ents[j].label parse.append((start, end, self.strings[label])) parses.append((prob, parse)) return parses diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 73da8139d..3ed59d3e8 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -224,6 +224,16 @@ cdef void cpu_regression_loss(float* d_scores, d_scores[i] = diff +def _collect_states(beams): + cdef StateClass state + cdef Beam beam + states = [] + for beam in beams: + state = StateClass.borrow(beam.at(0)) + states.append(state) + return states + + cdef class Parser: """ Base class of the DependencyParser and EntityRecognizer. @@ -336,7 +346,7 @@ cdef class Parser: beam_density=beam_density) beam = beams[0] output = self.moves.get_beam_annot(beam) - state = beam.at(0) + state = StateClass.borrow(beam.at(0)) self.set_annotations([doc], [state], tensors=tokvecs) _cleanup(beam) return output @@ -356,10 +366,10 @@ cdef class Parser: if beam_density is None: beam_density = self.cfg.get('beam_density', 0.0) cdef Doc doc - cdef Beam beam for batch in cytoolz.partition_all(batch_size, docs): - batch = list(batch) - by_length = sorted(list(batch), key=lambda doc: len(doc)) + batch_in_order = list(batch) + by_length = sorted(batch_in_order, key=lambda doc: len(doc)) + batch_beams = [] for subbatch in cytoolz.partition_all(8, by_length): subbatch = list(subbatch) if beam_width == 1: @@ -369,21 +379,20 @@ cdef class Parser: beams, tokvecs = self.beam_parse(subbatch, beam_width=beam_width, beam_density=beam_density) - parse_states = [] - for beam in beams: - parse_states.append(beam.at(0)) - self.set_annotations(subbatch, parse_states, tensors=tokvecs) - yield from batch - for beam in beams: - _cleanup(beam) + parse_states = _collect_states(beams) + self.set_annotations(subbatch, parse_states, tensors=None) + for beam in beams: + _cleanup(beam) + for doc in batch_in_order: + yield doc def parse_batch(self, docs): cdef: precompute_hiddens state2vec - StateClass stcls Pool mem const float* feat_weights StateC* st + StateClass stcls vector[StateC*] states int guess, nr_class, nr_feat, nr_piece, nr_dim, nr_state, nr_step int j @@ -488,14 +497,14 @@ cdef class Parser: beam = Beam(nr_class, beam_width, min_density=beam_density) beam.initialize(self.moves.init_beam_state, doc.length, doc.c) for i in range(beam.width): - stcls = beam.at(i) - stcls.c.offset = offset + state = beam.at(i) + state.offset = offset offset += len(doc) beam.check_done(_check_final_state, NULL) while not beam.is_done: states = [] for i in range(beam.size): - stcls = beam.at(i) + stcls = StateClass.borrow(beam.at(i)) # This way we avoid having to score finalized states # We do have to take care to keep indexes aligned, though if not stcls.is_final(): @@ -511,9 +520,9 @@ cdef class Parser: j = 0 c_scores = scores.data for i in range(beam.size): - stcls = beam.at(i) - if not stcls.is_final(): - self.moves.set_valid(beam.is_valid[i], stcls.c) + state = beam.at(i) + if not state.is_final(): + self.moves.set_valid(beam.is_valid[i], state) for k in range(nr_class): beam.scores[i][k] = c_scores[j * scores.shape[1] + k] j += 1 @@ -965,27 +974,40 @@ cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) no # These are passed as callbacks to thinc.search.Beam cdef int _transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1: - dest = _dest - src = _src + dest = _dest + src = _src moves = _moves dest.clone(src) - moves[clas].do(dest.c, moves[clas].label) - dest.c.push_hist(clas) + moves[clas].do(dest, moves[clas].label) + dest.push_hist(clas) cdef int _check_final_state(void* _state, void* extra_args) except -1: - return (_state).is_final() + state = _state + return state.is_final() def _cleanup(Beam beam): + cdef StateC* state + # Once parsing has finished, states in beam may not be unique. Is this + # correct? + seen = set() for i in range(beam.width): - Py_XDECREF(beam._states[i].content) - Py_XDECREF(beam._parents[i].content) + addr = beam._parents[i].content + if addr not in seen: + state = addr + del state + seen.add(addr) + addr = beam._states[i].content + if addr not in seen: + state = addr + del state + seen.add(addr) cdef hash_t _hash_state(void* _state, void* _) except 0: - state = _state - if state.c.is_final(): + state = _state + if state.is_final(): return 1 else: - return state.c.hash() + return state.hash() diff --git a/spacy/syntax/stateclass.pxd b/spacy/syntax/stateclass.pxd index 0ae83ee27..0a9be3b7f 100644 --- a/spacy/syntax/stateclass.pxd +++ b/spacy/syntax/stateclass.pxd @@ -13,12 +13,22 @@ from ._state cimport StateC cdef class StateClass: cdef Pool mem cdef StateC* c + cdef int _borrowed @staticmethod cdef inline StateClass init(const TokenC* sent, int length): cdef StateClass self = StateClass() self.c = new StateC(sent, length) return self + + @staticmethod + cdef inline StateClass borrow(StateC* ptr): + cdef StateClass self = StateClass() + del self.c + self.c = ptr + self._borrowed = 1 + return self + @staticmethod cdef inline StateClass init_offset(const TokenC* sent, int length, int diff --git a/spacy/syntax/stateclass.pyx b/spacy/syntax/stateclass.pyx index ea0ec77e5..2a15a2de1 100644 --- a/spacy/syntax/stateclass.pyx +++ b/spacy/syntax/stateclass.pyx @@ -11,12 +11,14 @@ cdef class StateClass: def __init__(self, Doc doc=None, int offset=0): cdef Pool mem = Pool() self.mem = mem + self._borrowed = 0 if doc is not None: self.c = new StateC(doc.c, doc.length) self.c.offset = offset def __dealloc__(self): - del self.c + if self._borrowed != 1: + del self.c @property def stack(self): diff --git a/spacy/syntax/transition_system.pyx b/spacy/syntax/transition_system.pyx index c351636c4..94b1ef2b1 100644 --- a/spacy/syntax/transition_system.pyx +++ b/spacy/syntax/transition_system.pyx @@ -23,8 +23,7 @@ class OracleError(Exception): cdef void* _init_state(Pool mem, int length, void* tokens) except NULL: - cdef StateClass st = StateClass.init(tokens, length) - Py_INCREF(st) + cdef StateC* st = new StateC(tokens, length) return st From 855872f872196f28fde4a0a2e09e01829b4d26b2 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 14 Nov 2017 23:36:46 +0100 Subject: [PATCH 2/3] Remove state hashing --- spacy/syntax/_beam_utils.pyx | 5 +---- spacy/syntax/nn_parser.pyx | 18 +++++++++--------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/spacy/syntax/_beam_utils.pyx b/spacy/syntax/_beam_utils.pyx index b26d13e4d..20cbbbaaa 100644 --- a/spacy/syntax/_beam_utils.pyx +++ b/spacy/syntax/_beam_utils.pyx @@ -81,10 +81,7 @@ cdef class ParserBeam(object): self._set_scores(beam, scores[i]) if self.golds is not None: self._set_costs(beam, self.golds[i], follow_gold=follow_gold) - if follow_gold: - beam.advance(_transition_state, _hash_state, self.moves.c) - else: - beam.advance(_transition_state, _hash_state, self.moves.c) + beam.advance(_transition_state, NULL, self.moves.c) beam.check_done(_check_final_state, NULL) # This handles the non-monotonic stuff for the parser. if beam.is_done and self.golds is not None: diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 3ed59d3e8..49ffe2062 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -526,7 +526,7 @@ cdef class Parser: for k in range(nr_class): beam.scores[i][k] = c_scores[j * scores.shape[1] + k] j += 1 - beam.advance(_transition_state, _hash_state, self.moves.c) + beam.advance(_transition_state, NULL, self.moves.c) beam.check_done(_check_final_state, NULL) beams.append(beam) tokvecs = self.model[0].ops.unflatten(tokvecs, @@ -998,16 +998,16 @@ def _cleanup(Beam beam): state = addr del state seen.add(addr) + else: + print(i, addr) + print(seen) + raise Exception addr = beam._states[i].content if addr not in seen: state = addr del state seen.add(addr) - - -cdef hash_t _hash_state(void* _state, void* _) except 0: - state = _state - if state.is_final(): - return 1 - else: - return state.hash() + else: + print(i, addr) + print(seen) + raise Exception From d274d3a3b9f4625ab0854b7896e2134d3be0f4bc Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 15 Nov 2017 00:51:42 +0100 Subject: [PATCH 3/3] Let beam forward use minibatches --- spacy/syntax/nn_parser.pyx | 55 ++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 49ffe2062..bb01cecf1 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -17,7 +17,7 @@ from cpython.ref cimport PyObject, Py_XDECREF from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno from libc.math cimport exp from libcpp.vector cimport vector -from libc.string cimport memset +from libc.string cimport memset, memcpy from libc.stdlib cimport calloc, free from cymem.cymem cimport Pool from thinc.typedefs cimport weight_t, class_t, hash_t @@ -485,14 +485,14 @@ cdef class Parser: cdef np.ndarray scores cdef Doc doc cdef int nr_class = self.moves.n_moves - cdef StateClass stcls, output cuda_stream = util.get_cuda_stream() (tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model( docs, cuda_stream, 0.0) - beams = [] cdef int offset = 0 cdef int j = 0 cdef int k + + beams = [] for doc in docs: beam = Beam(nr_class, beam_width, min_density=beam_density) beam.initialize(self.moves.init_beam_state, doc.length, doc.c) @@ -501,34 +501,43 @@ cdef class Parser: state.offset = offset offset += len(doc) beam.check_done(_check_final_state, NULL) - while not beam.is_done: - states = [] + beams.append(beam) + cdef np.ndarray token_ids + token_ids = numpy.zeros((len(docs) * beam_width, self.nr_feature), + dtype='i', order='C') + todo = [beam for beam in beams if not beam.is_done] + + cdef int* c_ids + cdef int nr_feature = self.nr_feature + cdef int n_states + while todo: + todo = [beam for beam in beams if not beam.is_done] + token_ids.fill(-1) + c_ids = token_ids.data + n_states = 0 + for beam in todo: for i in range(beam.size): - stcls = StateClass.borrow(beam.at(i)) + state = beam.at(i) # This way we avoid having to score finalized states # We do have to take care to keep indexes aligned, though - if not stcls.is_final(): - states.append(stcls) - token_ids = self.get_token_ids(states) - vectors = state2vec(token_ids) - if self.cfg.get('hist_size', 0): - hists = numpy.asarray([st.history[:self.cfg['hist_size']] - for st in states], dtype='i') - scores = vec2scores((vectors, hists)) - else: - scores = vec2scores(vectors) - j = 0 - c_scores = scores.data + if not state.is_final(): + state.set_context_tokens(c_ids, nr_feature) + c_ids += nr_feature + n_states += 1 + if n_states == 0: + break + vectors = state2vec(token_ids[:n_states]) + scores = vec2scores(vectors) + c_scores = scores.data + for beam in todo: for i in range(beam.size): state = beam.at(i) if not state.is_final(): self.moves.set_valid(beam.is_valid[i], state) - for k in range(nr_class): - beam.scores[i][k] = c_scores[j * scores.shape[1] + k] - j += 1 + memcpy(beam.scores[i], c_scores, nr_class * sizeof(float)) + c_scores += nr_class beam.advance(_transition_state, NULL, self.moves.c) beam.check_done(_check_final_state, NULL) - beams.append(beam) tokvecs = self.model[0].ops.unflatten(tokvecs, [len(doc) for doc in docs]) return beams, tokvecs @@ -536,7 +545,7 @@ cdef class Parser: def update(self, docs, golds, drop=0., sgd=None, losses=None): if not any(self.moves.has_gold(gold) for gold in golds): return None - if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= 0.5: + if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= 0.0: return self.update_beam(docs, golds, self.cfg['beam_width'], self.cfg['beam_density'], drop=drop, sgd=sgd, losses=losses)