mirror of https://github.com/explosion/spaCy.git
Fix json imports and use ujson
This commit is contained in:
parent
958b12dec8
commit
e1efd589c3
|
@ -145,7 +145,7 @@ def read_json_file(loc, docs_filter=None):
|
|||
yield from read_json_file(loc / filename)
|
||||
else:
|
||||
with io.open(loc, 'r', encoding='utf8') as file_:
|
||||
docs = json.load(file_)
|
||||
docs = ujson.load(file_)
|
||||
for doc in docs:
|
||||
if docs_filter is not None and not docs_filter(doc):
|
||||
continue
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import ujson
|
||||
|
||||
from .typedefs cimport attr_t
|
||||
from .typedefs cimport hash_t
|
||||
from .attrs cimport attr_id_t
|
||||
|
@ -53,12 +55,6 @@ from .attrs import FLAG36 as L9_ENT
|
|||
from .attrs import FLAG35 as L10_ENT
|
||||
|
||||
|
||||
try:
|
||||
import ujson as json
|
||||
except ImportError:
|
||||
import json
|
||||
|
||||
|
||||
cpdef enum quantifier_t:
|
||||
_META
|
||||
ONE
|
||||
|
@ -194,7 +190,7 @@ cdef class Matcher:
|
|||
"""
|
||||
if (path / 'gazetteer.json').exists():
|
||||
with (path / 'gazetteer.json').open('r', encoding='utf8') as file_:
|
||||
patterns = json.load(file_)
|
||||
patterns = ujson.load(file_)
|
||||
else:
|
||||
patterns = {}
|
||||
return cls(vocab, patterns)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import json
|
||||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import ujson
|
||||
from collections import defaultdict
|
||||
|
||||
from cymem.cymem cimport Pool
|
||||
|
@ -131,7 +131,7 @@ cdef class Tagger:
|
|||
path = util.ensure_path(path)
|
||||
if (path / 'templates.json').exists():
|
||||
with (path / 'templates.json').open('r', encoding='utf8') as file_:
|
||||
templates = json.load(file_)
|
||||
templates = ujson.load(file_)
|
||||
elif require:
|
||||
raise IOError(
|
||||
"Required file %s/templates.json not found when loading Tagger" % str(path))
|
||||
|
|
|
@ -2,16 +2,10 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import ujson
|
||||
|
||||
from cython.operator cimport dereference as deref
|
||||
from cython.operator cimport preincrement as preinc
|
||||
|
||||
try:
|
||||
import ujson as json
|
||||
except ImportError:
|
||||
import json
|
||||
|
||||
|
||||
from cymem.cymem cimport Pool
|
||||
from preshed.maps cimport PreshMap
|
||||
|
||||
|
@ -52,7 +46,7 @@ cdef class Tokenizer:
|
|||
path = util.ensure_path(path)
|
||||
if rules is None:
|
||||
with (path / 'tokenizer' / 'specials.json').open('r', encoding='utf8') as file_:
|
||||
rules = json.load(file_)
|
||||
rules = ujson.load(file_)
|
||||
if prefix_search in (None, True):
|
||||
with (path / 'tokenizer' / 'prefix.txt').open() as file_:
|
||||
entries = file_.read().split('\n')
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
from __future__ import unicode_literals, print_function
|
||||
|
||||
import io
|
||||
import json
|
||||
import ujson
|
||||
import re
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
@ -117,7 +117,7 @@ def parse_package_meta(package_path, package, require=True):
|
|||
location = package_path / package / 'meta.json'
|
||||
if location.is_file():
|
||||
with io.open(location, encoding='utf8') as f:
|
||||
meta = json.load(f)
|
||||
meta = ujson.load(f)
|
||||
return meta
|
||||
elif require:
|
||||
raise IOError("Could not read meta.json from %s" % location)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import bz2
|
||||
import ujson as json
|
||||
import ujson
|
||||
import re
|
||||
|
||||
from libc.string cimport memset
|
||||
|
@ -69,7 +69,7 @@ cdef class Vocab:
|
|||
"Install vectors after loading.")
|
||||
if tag_map is True and (path / 'vocab' / 'tag_map.json').exists():
|
||||
with (path / 'vocab' / 'tag_map.json').open('r', encoding='utf8') as file_:
|
||||
tag_map = json.load(file_)
|
||||
tag_map = ujson.load(file_)
|
||||
elif tag_map is True:
|
||||
tag_map = None
|
||||
if lex_attr_getters is not None \
|
||||
|
@ -82,12 +82,12 @@ cdef class Vocab:
|
|||
lemmatizer = Lemmatizer.load(path)
|
||||
if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists():
|
||||
with (path / 'vocab' / 'serializer.json').open('r', encoding='utf8') as file_:
|
||||
serializer_freqs = json.load(file_)
|
||||
serializer_freqs = ujson.load(file_)
|
||||
else:
|
||||
serializer_freqs = None
|
||||
|
||||
with (path / 'vocab' / 'strings.json').open('r', encoding='utf8') as file_:
|
||||
strings_list = json.load(file_)
|
||||
strings_list = ujson.load(file_)
|
||||
cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map,
|
||||
lemmatizer=lemmatizer, serializer_freqs=serializer_freqs,
|
||||
strings=strings_list)
|
||||
|
|
Loading…
Reference in New Issue