From 77d698dcae9d81767cced2ce307d4cb89060358e Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Wed, 4 Aug 2021 16:20:41 +0900 Subject: [PATCH] Fix check for RIGHT_ATTRS in dep matcher (#8807) * Fix check for RIGHT_ATTRs in dep matcher If a non-anchor node does not have RIGHT_ATTRS, the dep matcher throws an E100, which says that non-anchor nodes must have LEFT_ID, REL_OP, and RIGHT_ID. It specifically does not say RIGHT_ATTRS is required. A blank RIGHT_ATTRS is also valid, and patterns with one will be excepted. While not normal, sometimes a REL_OP is enough to specify a non-anchor node - maybe you just want the head of another node unconditionally, for example. This change just sets RIGHT_ATTRS to {} if not present. Alternatively changing E100 to state RIGHT_ATTRS is required could also be reasonable. * Fix test This test was written on the assumption that if `RIGHT_ATTRS` isn't present an error will be raised. Since the proposed changes make it so an error won't be raised this is no longer necessary. * Revert test, update error message Error message now lists missing keys, and RIGHT_ATTRS is required. * Use list of required keys in error message Also removes unused key param arg. --- spacy/errors.py | 4 ++-- spacy/matcher/dependencymatcher.pyx | 18 +++++++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index 5651ab0fa..9264ca6d1 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -356,8 +356,8 @@ class Errors: E098 = ("Invalid pattern: expected both RIGHT_ID and RIGHT_ATTRS.") E099 = ("Invalid pattern: the first node of pattern should be an anchor " "node. The node should only contain RIGHT_ID and RIGHT_ATTRS.") - E100 = ("Nodes other than the anchor node should all contain LEFT_ID, " - "REL_OP and RIGHT_ID.") + E100 = ("Nodes other than the anchor node should all contain {required}, " + "but these are missing: {missing}") E101 = ("RIGHT_ID should be a new node and LEFT_ID should already have " "have been declared in previous edges.") E102 = ("Can't merge non-disjoint spans. '{token}' is already part of " diff --git a/spacy/matcher/dependencymatcher.pyx b/spacy/matcher/dependencymatcher.pyx index b6e84a5da..9e0842d59 100644 --- a/spacy/matcher/dependencymatcher.pyx +++ b/spacy/matcher/dependencymatcher.pyx @@ -122,13 +122,17 @@ cdef class DependencyMatcher: raise ValueError(Errors.E099.format(key=key)) visited_nodes[relation["RIGHT_ID"]] = True else: - if not( - "RIGHT_ID" in relation - and "RIGHT_ATTRS" in relation - and "REL_OP" in relation - and "LEFT_ID" in relation - ): - raise ValueError(Errors.E100.format(key=key)) + required_keys = set( + ("RIGHT_ID", "RIGHT_ATTRS", "REL_OP", "LEFT_ID") + ) + relation_keys = set(relation.keys()) + missing = required_keys - relation_keys + if missing: + missing_txt = ", ".join(list(missing)) + raise ValueError(Errors.E100.format( + required=required_keys, + missing=missing_txt + )) if ( relation["RIGHT_ID"] in visited_nodes or relation["LEFT_ID"] not in visited_nodes