mirror of https://github.com/explosion/spaCy.git
* Fix head alignment in read_conll.parse, which was causing corrupt parses when strip_bad_periods=True. A similar problem may apply to other data readers.
This commit is contained in:
parent
f868175e43
commit
60d26243e3
|
@ -10,11 +10,12 @@ def parse(sent_text, strip_bad_periods=False):
|
|||
assert sent_text
|
||||
annot = []
|
||||
words = []
|
||||
id_map = {}
|
||||
id_map = {-1: -1}
|
||||
for i, line in enumerate(sent_text.split('\n')):
|
||||
word, tag, head, dep = _parse_line(line)
|
||||
if strip_bad_periods and words and _is_bad_period(words[-1], word):
|
||||
continue
|
||||
id_map[i] = len(words)
|
||||
|
||||
annot.append({
|
||||
'id': len(words),
|
||||
|
@ -23,6 +24,8 @@ def parse(sent_text, strip_bad_periods=False):
|
|||
'head': int(head) - 1,
|
||||
'dep': dep})
|
||||
words.append(word)
|
||||
for entry in annot:
|
||||
entry['head'] = id_map[entry['head']]
|
||||
return words, annot
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue