From 095c63c6b8e3fc8d1da2c914a996ddecba19864f Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 25 Aug 2019 21:56:47 +0200 Subject: [PATCH] Avoid making prepositions get the tag SCONJ --- spacy/lang/en/morph_rules.py | 48 +++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/spacy/lang/en/morph_rules.py b/spacy/lang/en/morph_rules.py index 198182ff0..b00534cc5 100644 --- a/spacy/lang/en/morph_rules.py +++ b/spacy/lang/en/morph_rules.py @@ -3,55 +3,59 @@ from __future__ import unicode_literals from ...symbols import LEMMA, PRON_LEMMA +# Several entries here look pretty suspicious. These will get the POS SCONJ +# given the tag IN, when an adpositional reading seems much more likely for +# a lot of these prepositions. I'm not sure what I was running in 04395ffa4 +# when I did this? It doesn't seem right. _subordinating_conjunctions = [ "that", "if", "as", "because", - "of", - "for", - "before", - "in", + #"of", + #"for", + #"before", + #"in", "while", - "after", + #"after", "since", "like", - "with", + #"with", "so", - "to", - "by", - "on", - "about", + #"to", + #"by", + #"on", + #"about", "than", "whether", "although", - "from", + #"from", "though", - "until", + #"until", "unless", "once", - "without", - "at", - "into", + #"without", + #"at", + #"into", "cause", - "over", + #"over", "upon", "till", "whereas", - "beyond", + #"beyond", "whilst", "except", "despite", "wether", - "then", + #"then", "but", "becuse", "whie", - "below", - "against", + #"below", + #"against", "it", "w/out", - "toward", + #"toward", "albeit", "save", "besides", @@ -63,7 +67,7 @@ _subordinating_conjunctions = [ "out", "near", "seince", - "towards", + #"towards", "tho", "sice", "will",