mirror of https://github.com/explosion/spaCy.git
Merge pull request #282 from henningpeters/switch_vectors
initial proposal for ability to switch vectors
This commit is contained in:
commit
20235bde00
|
@ -9,4 +9,4 @@ plac
|
|||
six
|
||||
ujson
|
||||
cloudpickle
|
||||
sputnik>=0.9.0,<0.10.0
|
||||
sputnik>=0.9.2,<0.10.0
|
||||
|
|
2
setup.py
2
setup.py
|
@ -176,7 +176,7 @@ def setup_package():
|
|||
ext_modules=ext_modules,
|
||||
install_requires=['numpy', 'murmurhash>=0.26,<0.27', 'cymem>=1.30,<1.32.0', 'preshed>=0.46.1,<0.47',
|
||||
'thinc>=5.0.0,<5.1.0', 'plac', 'six',
|
||||
'ujson', 'cloudpickle', 'sputnik>=0.9.0,<0.10.0'],
|
||||
'ujson', 'cloudpickle', 'sputnik>=0.9.2,<0.10.0'],
|
||||
cmdclass = {
|
||||
'build_ext': build_ext_subclass},
|
||||
)
|
||||
|
|
|
@ -2,6 +2,7 @@ from . import util
|
|||
from .en import English
|
||||
|
||||
|
||||
def load(name, via=None):
|
||||
package = util.get_package_by_name(name, via=via)
|
||||
return English(package=package)
|
||||
def load(name, vectors=None, via=None):
|
||||
return English(
|
||||
package=util.get_package_by_name(name, via=via),
|
||||
vectors_package=util.get_package_by_name(vectors, via=via))
|
||||
|
|
|
@ -153,7 +153,7 @@ class Language(object):
|
|||
return {0: {'PER': True, 'LOC': True, 'ORG': True, 'MISC': True}}
|
||||
|
||||
@classmethod
|
||||
def default_vocab(cls, package, get_lex_attr=None):
|
||||
def default_vocab(cls, package, get_lex_attr=None, vectors_package=None):
|
||||
if get_lex_attr is None:
|
||||
if package.has_file('vocab', 'oov_prob'):
|
||||
with package.open(('vocab', 'oov_prob')) as file_:
|
||||
|
@ -162,7 +162,8 @@ class Language(object):
|
|||
else:
|
||||
get_lex_attr = cls.default_lex_attrs()
|
||||
if hasattr(package, 'dir_path'):
|
||||
return Vocab.from_package(package, get_lex_attr=get_lex_attr)
|
||||
return Vocab.from_package(package, get_lex_attr=get_lex_attr,
|
||||
vectors_package=vectors_package)
|
||||
else:
|
||||
return Vocab.load(package, get_lex_attr)
|
||||
|
||||
|
@ -198,7 +199,8 @@ class Language(object):
|
|||
matcher=None,
|
||||
serializer=None,
|
||||
load_vectors=True,
|
||||
package=None):
|
||||
package=None,
|
||||
vectors_package=None):
|
||||
"""
|
||||
a model can be specified:
|
||||
|
||||
|
@ -228,7 +230,7 @@ class Language(object):
|
|||
warn("load_vectors is deprecated", DeprecationWarning)
|
||||
|
||||
if vocab in (None, True):
|
||||
vocab = self.default_vocab(package)
|
||||
vocab = self.default_vocab(package, vectors_package=vectors_package)
|
||||
self.vocab = vocab
|
||||
if tokenizer in (None, True):
|
||||
tokenizer = Tokenizer.from_package(package, self.vocab)
|
||||
|
|
|
@ -25,9 +25,9 @@ def get_package_by_name(name=None, via=None):
|
|||
return sputnik.package(about.__title__, about.__version__,
|
||||
name or about.__default_model__, data_path=via)
|
||||
except PackageNotFoundException as e:
|
||||
raise RuntimeError("Model not installed. Please run 'python -m "
|
||||
raise RuntimeError("Model %s not installed. Please run 'python -m "
|
||||
"spacy.en.download' to install latest compatible "
|
||||
"model.")
|
||||
"model." % name)
|
||||
except CompatiblePackageNotFoundException as e:
|
||||
raise RuntimeError("Installed model is not compatible with spaCy "
|
||||
"version. Please run 'python -m spacy.en.download "
|
||||
|
|
|
@ -52,7 +52,7 @@ cdef class Vocab:
|
|||
return cls.from_package(get_package(data_dir), get_lex_attr=get_lex_attr)
|
||||
|
||||
@classmethod
|
||||
def from_package(cls, package, get_lex_attr=None):
|
||||
def from_package(cls, package, get_lex_attr=None, vectors_package=None):
|
||||
tag_map = package.load_json(('vocab', 'tag_map.json'), default={})
|
||||
|
||||
lemmatizer = Lemmatizer.from_package(package)
|
||||
|
@ -66,7 +66,10 @@ cdef class Vocab:
|
|||
self.strings.load(file_)
|
||||
self.load_lexemes(package.file_path('vocab', 'lexemes.bin'))
|
||||
|
||||
if package.has_file('vocab', 'vec.bin'):
|
||||
if vectors_package and vectors_package.has_file('vocab', 'vec.bin'):
|
||||
self.vectors_length = self.load_vectors_from_bin_loc(
|
||||
vectors_package.file_path('vocab', 'vec.bin'))
|
||||
elif package.has_file('vocab', 'vec.bin'):
|
||||
self.vectors_length = self.load_vectors_from_bin_loc(
|
||||
package.file_path('vocab', 'vec.bin'))
|
||||
return self
|
||||
|
|
Loading…
Reference in New Issue