spaCy/spacy/nonproj.py



def ancestors(word, heads):
    # returns all words going from the word up the path to the root
    # the path to root cannot be longer than the number of words in the sentence
    # this function ends after at most len(heads) steps
    # because it would otherwise loop indefinitely on cycles
    head = word
    cnt = 0
    while heads[head] != head and cnt < len(heads):
        head = heads[head]
        cnt += 1
        yield head
        if head == None:
            break


def contains_cycle(heads):
    # in an acyclic tree, the path from each word following
    # the head relation upwards always ends at the root node
    for word in range(len(heads)):
        seen = set([word])
        for ancestor in ancestors(word,heads):
            if ancestor in seen:
                return seen
            seen.add(ancestor)
    return None


def is_non_projective_arc(word, heads):
    # definition (e.g. Havelka 2007): an arc h -> d, h < d is non-projective
    # if there is a word k, h < k < d such that h is not
    # an ancestor of k. Same for h -> d, h > d
    head = heads[word]
    if head == word: # root arcs cannot be non-projective
        return False
    elif head == None: # unattached tokens cannot be non-projective
        return False

    start, end = (head+1, word) if head < word else (word+1, head)
    for k in range(start,end):
        for ancestor in ancestors(k,heads):
            if ancestor == None: # for unattached tokens/subtrees
                break
            elif ancestor == head: # normal case: k dominated by h
                break
        else: # head not in ancestors: d -> h is non-projective
            return True
    return False


def is_non_projective_tree(heads):
    # a tree is non-projective if at least one arc is non-projective
    return any( is_non_projective_arc(word,heads) for word in range(len(heads)) )