mirror of https://github.com/explosion/spaCy.git
* Remove em-dash from lemma rules. Handle instead in specials.
This commit is contained in:
commit
d341443282
|
@ -1,3 +1,4 @@
|
|||
# -#- coding: utf-8 -*-
|
||||
import json
|
||||
|
||||
contractions = {"n't", "'nt", "not", "'ve", "'d", "'ll", "'s", "'m", "'ma", "'re"}
|
||||
|
@ -133,6 +134,8 @@ hardcoded_specials = {
|
|||
|
||||
"''": [{"F": "''"}],
|
||||
|
||||
"—": [{"F": "—", "L": "--", "P": ":"}],
|
||||
|
||||
"Corp.": [{"F": "Corp."}],
|
||||
"Inc.": [{"F": "Inc."}],
|
||||
"Co.": [{"F": "Co."}],
|
||||
|
@ -412,6 +415,6 @@ def generate_specials():
|
|||
|
||||
if __name__ == "__main__":
|
||||
specials = generate_specials()
|
||||
with open("specials.json", "w") as f:
|
||||
json.dump(specials, f)
|
||||
with open("specials.json", "w") as file_:
|
||||
file_.write(json.dumps(specials, indent=2))
|
||||
|
||||
|
|
|
@ -32,6 +32,5 @@
|
|||
"punct": [
|
||||
["“", "\""],
|
||||
["”", "\""],
|
||||
["—", "--"]
|
||||
]
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue