From 7918fa4ef9417a04ec65191fac5b16db08c5add8 Mon Sep 17 00:00:00 2001 From: Kevin Humphreys Date: Wed, 3 Jan 2018 12:25:48 -0800 Subject: [PATCH] handle would've --- spacy/lang/en/tokenizer_exceptions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/lang/en/tokenizer_exceptions.py b/spacy/lang/en/tokenizer_exceptions.py index 064b7ea59..df28e1287 100644 --- a/spacy/lang/en/tokenizer_exceptions.py +++ b/spacy/lang/en/tokenizer_exceptions.py @@ -213,7 +213,8 @@ for verb_data in [ {ORTH: "could", NORM: "could", TAG: "MD"}, {ORTH: "might", NORM: "might", TAG: "MD"}, {ORTH: "must", NORM: "must", TAG: "MD"}, - {ORTH: "should", NORM: "should", TAG: "MD"}]: + {ORTH: "should", NORM: "should", TAG: "MD"}, + {ORTH: "would", NORM: "would", TAG: "MD"}]: verb_data_tc = dict(verb_data) verb_data_tc[ORTH] = verb_data_tc[ORTH].title() for data in [verb_data, verb_data_tc]: