Improve efficiency of deprojectivization

This commit is contained in:
Matthew Honnibal 2017-11-17 18:55:13 +01:00
parent 8fec7268eb
commit db5c714ad2
1 changed files with 13 additions and 8 deletions

View File

@ -1,4 +1,6 @@
# coding: utf-8 # coding: utf-8
# cython: profile=True
# cython: infer_types=True
"""Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005 """Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005
for doing pseudo-projective parsing implementation uses the HEAD decoration for doing pseudo-projective parsing implementation uses the HEAD decoration
scheme. scheme.
@ -7,6 +9,8 @@ from __future__ import unicode_literals
from copy import copy from copy import copy
from ..tokens.doc cimport Doc
DELIMITER = '||' DELIMITER = '||'
@ -111,17 +115,18 @@ def projectivize(heads, labels):
return proj_heads, deco_labels return proj_heads, deco_labels
def deprojectivize(tokens): cpdef deprojectivize(Doc doc):
# Reattach arcs with decorated labels (following HEAD scheme). For each # Reattach arcs with decorated labels (following HEAD scheme). For each
# decorated arc X||Y, search top-down, left-to-right, breadth-first until # decorated arc X||Y, search top-down, left-to-right, breadth-first until
# hitting a Y then make this the new head. # hitting a Y then make this the new head.
for token in tokens: for i in range(doc.length):
if is_decorated(token.dep_): label = doc.vocab.strings[doc.c[i].dep]
newlabel, headlabel = decompose(token.dep_) if DELIMITER in label:
newhead = _find_new_head(token, headlabel) new_label, head_label = label.split(DELIMITER)
token.head = newhead new_head = _find_new_head(doc[i], head_label)
token.dep_ = newlabel doc[i].head = new_head
return tokens doc.c[i].dep = new_label
return doc
def _decorate(heads, proj_heads, labels): def _decorate(heads, proj_heads, labels):