From e361b4f82b6ee1186e4e0b9977939e7b647264c0 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 8 Feb 2018 15:25:41 +0100 Subject: [PATCH 1/2] Fix #1929: Incorrect NER when pre-set sentence boundaries. --- spacy/syntax/ner.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/syntax/ner.pyx b/spacy/syntax/ner.pyx index 999760ce0..73ef17534 100644 --- a/spacy/syntax/ner.pyx +++ b/spacy/syntax/ner.pyx @@ -281,7 +281,7 @@ cdef class Begin: elif preset_ent_iob == 3 and st.B_(1).ent_iob != 1: return False # Don't allow entities to extend across sentence boundaries - elif st.B_(1).sent_start: + elif st.B_(1).sent_start == 1: return False else: return label != 0 and not st.entity_is_open() @@ -327,7 +327,7 @@ cdef class In: elif st.B_(1).ent_iob != preset_ent_iob: return False # Don't allow entities to extend across sentence boundaries - elif st.B_(1).sent_start: + elif st.B_(1).sent_start == 1: return False return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label From 24af6375db3b312539af4cb06620e23e06e5aa81 Mon Sep 17 00:00:00 2001 From: Orion Montoya Date: Thu, 8 Feb 2018 10:49:09 -0800 Subject: [PATCH 2/2] update link to Honnibal and Johnson 2015 aclweb.org is throwing a gateway timeout on the link as `https`+`aclweb.org`, but is fine with `https`+`www.aclweb.org` (also with `http`+`aclweb.org`, but let's keep it in `https`, shall we? --- website/usage/_facts-figures/_benchmarks.jade | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/usage/_facts-figures/_benchmarks.jade b/website/usage/_facts-figures/_benchmarks.jade index b530b84de..dabf58795 100644 --- a/website/usage/_facts-figures/_benchmarks.jade +++ b/website/usage/_facts-figures/_benchmarks.jade @@ -13,7 +13,7 @@ p | Their results and subsequent discussions helped us develop a novel | psychologically-motivated technique to improve spaCy's accuracy, which | we published in joint work with Macquarie University - | #[+a("https://aclweb.org/anthology/D/D15/D15-1162.pdf") (Honnibal and Johnson, 2015)]. + | #[+a("https://www.aclweb.org/anthology/D/D15/D15-1162.pdf") (Honnibal and Johnson, 2015)]. include _benchmarks-choi-2015