Fix json imports and use ujson

2017-04-15 12:13:34 +02:00 · 2017-04-15 12:13:34 +02:00 · e1efd589c3
parent 958b12dec8
commit e1efd589c3
6 changed files with 14 additions and 24 deletions
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@ -145,7 +145,7 @@ def read_json_file(loc, docs_filter=None):
            yield from read_json_file(loc / filename)
    else:
        with io.open(loc, 'r', encoding='utf8') as file_:
-            docs = json.load(file_)
+            docs = ujson.load(file_)
        for doc in docs:
            if docs_filter is not None and not docs_filter(doc):
                continue
--- a/spacy/matcher.pyx
+++ b/spacy/matcher.pyx
@ -3,6 +3,8 @@
 # coding: utf8
 from __future__ import unicode_literals

+import ujson
+
 from .typedefs cimport attr_t
 from .typedefs cimport hash_t
 from .attrs cimport attr_id_t
@ -53,12 +55,6 @@ from .attrs import FLAG36 as L9_ENT
 from .attrs import FLAG35 as L10_ENT


-try:
-    import ujson as json
-except ImportError:
-    import json
-
-
 cpdef enum quantifier_t:
    _META
    ONE
@ -194,7 +190,7 @@ cdef class Matcher:
        """
        if (path / 'gazetteer.json').exists():
            with (path / 'gazetteer.json').open('r', encoding='utf8') as file_:
-                patterns = json.load(file_)
+                patterns = ujson.load(file_)
        else:
            patterns = {}
        return cls(vocab, patterns)
--- a/spacy/tagger.pyx
+++ b/spacy/tagger.pyx
@ -1,7 +1,7 @@
-import json
 # coding: utf8
 from __future__ import unicode_literals

+import ujson
 from collections import defaultdict

 from cymem.cymem cimport Pool
@ -131,7 +131,7 @@ cdef class Tagger:
        path = util.ensure_path(path)
        if (path / 'templates.json').exists():
            with (path / 'templates.json').open('r', encoding='utf8') as file_:
-                templates = json.load(file_)
+                templates = ujson.load(file_)
        elif require:
            raise IOError(
                "Required file %s/templates.json not found when loading Tagger" % str(path))
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -2,16 +2,10 @@
 # coding: utf8
 from __future__ import unicode_literals

+import ujson

 from cython.operator cimport dereference as deref
 from cython.operator cimport preincrement as preinc
-
-try:
-    import ujson as json
-except ImportError:
-    import json
-
-
 from cymem.cymem cimport Pool
 from preshed.maps cimport PreshMap

@ -52,7 +46,7 @@ cdef class Tokenizer:
        path = util.ensure_path(path)
        if rules is None:
            with (path / 'tokenizer' / 'specials.json').open('r', encoding='utf8') as file_:
-                rules = json.load(file_)
+                rules = ujson.load(file_)
        if prefix_search in (None, True):
            with (path / 'tokenizer' / 'prefix.txt').open() as file_:
                entries = file_.read().split('\n')
--- a/spacy/util.py
+++ b/spacy/util.py
@ -2,7 +2,7 @@
 from __future__ import unicode_literals, print_function

 import io
-import json
+import ujson
 import re
 from pathlib import Path
 import sys
@ -117,7 +117,7 @@ def parse_package_meta(package_path, package, require=True):
    location = package_path / package / 'meta.json'
    if location.is_file():
        with io.open(location, encoding='utf8') as f:
-            meta = json.load(f)
+            meta = ujson.load(f)
            return meta
    elif require:
        raise IOError("Could not read meta.json from %s" % location)
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -2,7 +2,7 @@
 from __future__ import unicode_literals

 import bz2
-import ujson as json
+import ujson
 import re

 from libc.string cimport memset
@ -69,7 +69,7 @@ cdef class Vocab:
                "Install vectors after loading.")
        if tag_map is True and (path / 'vocab' / 'tag_map.json').exists():
            with (path / 'vocab' / 'tag_map.json').open('r', encoding='utf8') as file_:
-                tag_map = json.load(file_)
+                tag_map = ujson.load(file_)
        elif tag_map is True:
            tag_map = None
        if lex_attr_getters is not None \
@ -82,12 +82,12 @@ cdef class Vocab:
            lemmatizer = Lemmatizer.load(path)
        if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists():
            with (path / 'vocab' / 'serializer.json').open('r', encoding='utf8') as file_:
-                serializer_freqs = json.load(file_)
+                serializer_freqs = ujson.load(file_)
        else:
            serializer_freqs = None

        with (path / 'vocab' / 'strings.json').open('r', encoding='utf8') as file_:
-            strings_list = json.load(file_)
+            strings_list = ujson.load(file_)
        cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map,
                              lemmatizer=lemmatizer, serializer_freqs=serializer_freqs,
                              strings=strings_list)