mirror of https://github.com/explosion/spaCy.git
* Add unicode em dash to specials.json, so that we can control what POS tag it gets. This way we can prevent sentence boundary detection errors, to address Issue #130.
This commit is contained in:
parent
1490feda29
commit
393a13d1af
|
@ -133,6 +133,9 @@ hardcoded_specials = {
|
|||
"Mt.": [{"F": "Mt.", "L": "Mount"}],
|
||||
|
||||
"''": [{"F": "''"}],
|
||||
|
||||
"—": [{"F": "—", "L": "--", "P": ":"}],
|
||||
|
||||
"Corp.": [{"F": "Corp."}],
|
||||
"Inc.": [{"F": "Inc."}],
|
||||
"Co.": [{"F": "Co."}],
|
||||
|
|
Loading…
Reference in New Issue