diff --git a/lang_data/en/generate_specials.py b/lang_data/en/generate_specials.py index 1a8f1ae0b..e50cd77d4 100644 --- a/lang_data/en/generate_specials.py +++ b/lang_data/en/generate_specials.py @@ -1,3 +1,4 @@ +# -#- coding: utf-8 -*- import json contractions = {"n't", "'nt", "not", "'ve", "'d", "'ll", "'s", "'m", "'ma", "'re"} @@ -133,6 +134,8 @@ hardcoded_specials = { "''": [{"F": "''"}], + "—": [{"F": "—", "L": "--", "P": ":"}], + "Corp.": [{"F": "Corp."}], "Inc.": [{"F": "Inc."}], "Co.": [{"F": "Co."}], @@ -412,6 +415,6 @@ def generate_specials(): if __name__ == "__main__": specials = generate_specials() - with open("specials.json", "w") as f: - json.dump(specials, f) + with open("specials.json", "w") as file_: + file_.write(json.dumps(specials, indent=2)) diff --git a/lang_data/en/lemma_rules.json b/lang_data/en/lemma_rules.json index 5a431224d..140749b18 100644 --- a/lang_data/en/lemma_rules.json +++ b/lang_data/en/lemma_rules.json @@ -32,6 +32,5 @@ "punct": [ ["“", "\""], ["”", "\""], - ["—", "--"] ] }