From e1efd589c334d1820da3f3ee282c7dabb2307884 Mon Sep 17 00:00:00 2001 From: ines Date: Sat, 15 Apr 2017 12:13:34 +0200 Subject: [PATCH] Fix json imports and use ujson --- spacy/gold.pyx | 2 +- spacy/matcher.pyx | 10 +++------- spacy/tagger.pyx | 4 ++-- spacy/tokenizer.pyx | 10 ++-------- spacy/util.py | 4 ++-- spacy/vocab.pyx | 8 ++++---- 6 files changed, 14 insertions(+), 24 deletions(-) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index a33ca69a4..425ad0fe0 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -145,7 +145,7 @@ def read_json_file(loc, docs_filter=None): yield from read_json_file(loc / filename) else: with io.open(loc, 'r', encoding='utf8') as file_: - docs = json.load(file_) + docs = ujson.load(file_) for doc in docs: if docs_filter is not None and not docs_filter(doc): continue diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx index 37c34f0ca..c9084c359 100644 --- a/spacy/matcher.pyx +++ b/spacy/matcher.pyx @@ -3,6 +3,8 @@ # coding: utf8 from __future__ import unicode_literals +import ujson + from .typedefs cimport attr_t from .typedefs cimport hash_t from .attrs cimport attr_id_t @@ -53,12 +55,6 @@ from .attrs import FLAG36 as L9_ENT from .attrs import FLAG35 as L10_ENT -try: - import ujson as json -except ImportError: - import json - - cpdef enum quantifier_t: _META ONE @@ -194,7 +190,7 @@ cdef class Matcher: """ if (path / 'gazetteer.json').exists(): with (path / 'gazetteer.json').open('r', encoding='utf8') as file_: - patterns = json.load(file_) + patterns = ujson.load(file_) else: patterns = {} return cls(vocab, patterns) diff --git a/spacy/tagger.pyx b/spacy/tagger.pyx index 82c183706..59e8a2c66 100644 --- a/spacy/tagger.pyx +++ b/spacy/tagger.pyx @@ -1,7 +1,7 @@ -import json # coding: utf8 from __future__ import unicode_literals +import ujson from collections import defaultdict from cymem.cymem cimport Pool @@ -131,7 +131,7 @@ cdef class Tagger: path = util.ensure_path(path) if (path / 'templates.json').exists(): with (path / 'templates.json').open('r', encoding='utf8') as file_: - templates = json.load(file_) + templates = ujson.load(file_) elif require: raise IOError( "Required file %s/templates.json not found when loading Tagger" % str(path)) diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index 4312c72e6..c094bea0d 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -2,16 +2,10 @@ # coding: utf8 from __future__ import unicode_literals +import ujson from cython.operator cimport dereference as deref from cython.operator cimport preincrement as preinc - -try: - import ujson as json -except ImportError: - import json - - from cymem.cymem cimport Pool from preshed.maps cimport PreshMap @@ -52,7 +46,7 @@ cdef class Tokenizer: path = util.ensure_path(path) if rules is None: with (path / 'tokenizer' / 'specials.json').open('r', encoding='utf8') as file_: - rules = json.load(file_) + rules = ujson.load(file_) if prefix_search in (None, True): with (path / 'tokenizer' / 'prefix.txt').open() as file_: entries = file_.read().split('\n') diff --git a/spacy/util.py b/spacy/util.py index 8229d05cd..c6946ce6e 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals, print_function import io -import json +import ujson import re from pathlib import Path import sys @@ -117,7 +117,7 @@ def parse_package_meta(package_path, package, require=True): location = package_path / package / 'meta.json' if location.is_file(): with io.open(location, encoding='utf8') as f: - meta = json.load(f) + meta = ujson.load(f) return meta elif require: raise IOError("Could not read meta.json from %s" % location) diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 4089f65a3..4df97ddf0 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -2,7 +2,7 @@ from __future__ import unicode_literals import bz2 -import ujson as json +import ujson import re from libc.string cimport memset @@ -69,7 +69,7 @@ cdef class Vocab: "Install vectors after loading.") if tag_map is True and (path / 'vocab' / 'tag_map.json').exists(): with (path / 'vocab' / 'tag_map.json').open('r', encoding='utf8') as file_: - tag_map = json.load(file_) + tag_map = ujson.load(file_) elif tag_map is True: tag_map = None if lex_attr_getters is not None \ @@ -82,12 +82,12 @@ cdef class Vocab: lemmatizer = Lemmatizer.load(path) if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists(): with (path / 'vocab' / 'serializer.json').open('r', encoding='utf8') as file_: - serializer_freqs = json.load(file_) + serializer_freqs = ujson.load(file_) else: serializer_freqs = None with (path / 'vocab' / 'strings.json').open('r', encoding='utf8') as file_: - strings_list = json.load(file_) + strings_list = ujson.load(file_) cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map, lemmatizer=lemmatizer, serializer_freqs=serializer_freqs, strings=strings_list)