* Add unicode em dash to specials.json, so that we can control what POS tag it gets. This way we can prevent sentence boundary detection errors, to address Issue #130.

This commit is contained in:
Matthew Honnibal 2015-10-09 19:24:33 +11:00
parent 1490feda29
commit 393a13d1af
1 changed files with 3 additions and 0 deletions

View File

@ -133,6 +133,9 @@ hardcoded_specials = {
"Mt.": [{"F": "Mt.", "L": "Mount"}],
"''": [{"F": "''"}],
"": [{"F": "", "L": "--", "P": ":"}],
"Corp.": [{"F": "Corp."}],
"Inc.": [{"F": "Inc."}],
"Co.": [{"F": "Co."}],