* Remove em-dash from lemma rules. Handle instead in specials.

This commit is contained in:
Matthew Honnibal 2015-10-09 10:27:13 +02:00
commit d341443282
2 changed files with 5 additions and 3 deletions

View File

@ -1,3 +1,4 @@
# -#- coding: utf-8 -*-
import json
contractions = {"n't", "'nt", "not", "'ve", "'d", "'ll", "'s", "'m", "'ma", "'re"}
@ -133,6 +134,8 @@ hardcoded_specials = {
"''": [{"F": "''"}],
"": [{"F": "", "L": "--", "P": ":"}],
"Corp.": [{"F": "Corp."}],
"Inc.": [{"F": "Inc."}],
"Co.": [{"F": "Co."}],
@ -412,6 +415,6 @@ def generate_specials():
if __name__ == "__main__":
specials = generate_specials()
with open("specials.json", "w") as f:
json.dump(specials, f)
with open("specials.json", "w") as file_:
file_.write(json.dumps(specials, indent=2))

View File

@ -32,6 +32,5 @@
"punct": [
["“", "\""],
["”", "\""],
["—", "--"]
]
}