Avoid making prepositions get the tag SCONJ

This commit is contained in:
Matthew Honnibal 2019-08-25 21:56:47 +02:00
parent 22250cf6b7
commit 095c63c6b8
1 changed files with 26 additions and 22 deletions

View File

@ -3,55 +3,59 @@ from __future__ import unicode_literals
from ...symbols import LEMMA, PRON_LEMMA
# Several entries here look pretty suspicious. These will get the POS SCONJ
# given the tag IN, when an adpositional reading seems much more likely for
# a lot of these prepositions. I'm not sure what I was running in 04395ffa4
# when I did this? It doesn't seem right.
_subordinating_conjunctions = [
"that",
"if",
"as",
"because",
"of",
"for",
"before",
"in",
#"of",
#"for",
#"before",
#"in",
"while",
"after",
#"after",
"since",
"like",
"with",
#"with",
"so",
"to",
"by",
"on",
"about",
#"to",
#"by",
#"on",
#"about",
"than",
"whether",
"although",
"from",
#"from",
"though",
"until",
#"until",
"unless",
"once",
"without",
"at",
"into",
#"without",
#"at",
#"into",
"cause",
"over",
#"over",
"upon",
"till",
"whereas",
"beyond",
#"beyond",
"whilst",
"except",
"despite",
"wether",
"then",
#"then",
"but",
"becuse",
"whie",
"below",
"against",
#"below",
#"against",
"it",
"w/out",
"toward",
#"toward",
"albeit",
"save",
"besides",
@ -63,7 +67,7 @@ _subordinating_conjunctions = [
"out",
"near",
"seince",
"towards",
#"towards",
"tho",
"sice",
"will",