mirror of https://github.com/explosion/spaCy.git
Fix usage of pathlib for Python3 -- turning paths to strings.
This commit is contained in:
parent
453683aaf0
commit
af847e07fc
|
@ -154,7 +154,7 @@ def setup_vocab(get_lex_attr, tag_map, src_dir, dst_dir):
|
||||||
|
|
||||||
vectors_src = src_dir / 'vectors.bz2'
|
vectors_src = src_dir / 'vectors.bz2'
|
||||||
if vectors_src.exists():
|
if vectors_src.exists():
|
||||||
write_binary_vectors(str(vectors_src), str(dst_dir / 'vec.bin'))
|
write_binary_vectors(vectors_src.as_posix, (dst_dir / 'vec.bin').as_posix())
|
||||||
else:
|
else:
|
||||||
print("Warning: Word vectors file not found")
|
print("Warning: Word vectors file not found")
|
||||||
vocab = Vocab(get_lex_attr=get_lex_attr, tag_map=tag_map)
|
vocab = Vocab(get_lex_attr=get_lex_attr, tag_map=tag_map)
|
||||||
|
@ -186,7 +186,7 @@ def setup_vocab(get_lex_attr, tag_map, src_dir, dst_dir):
|
||||||
lexeme.cluster = int(clusters[word][::-1], 2)
|
lexeme.cluster = int(clusters[word][::-1], 2)
|
||||||
else:
|
else:
|
||||||
lexeme.cluster = 0
|
lexeme.cluster = 0
|
||||||
vocab.dump(str(dst_dir / 'lexemes.bin'))
|
vocab.dump((dst_dir / 'lexemes.bin').as_posix())
|
||||||
with (dst_dir / 'strings.json').open('w') as file_:
|
with (dst_dir / 'strings.json').open('w') as file_:
|
||||||
vocab.strings.dump(file_)
|
vocab.strings.dump(file_)
|
||||||
with (dst_dir / 'oov_prob').open('w') as file_:
|
with (dst_dir / 'oov_prob').open('w') as file_:
|
||||||
|
@ -210,18 +210,19 @@ def main(lang_id, lang_data_dir, corpora_dir, model_dir):
|
||||||
model_dir / 'vocab')
|
model_dir / 'vocab')
|
||||||
|
|
||||||
if (lang_data_dir / 'gazetteer.json').exists():
|
if (lang_data_dir / 'gazetteer.json').exists():
|
||||||
copyfile(str(lang_data_dir / 'gazetteer.json'),
|
copyfile((lang_data_dir / 'gazetteer.json').as_posix(),
|
||||||
str(model_dir / 'vocab' / 'gazetteer.json'))
|
(model_dir / 'vocab' / 'gazetteer.json').as_posix())
|
||||||
|
|
||||||
copyfile(str(lang_data_dir / 'tag_map.json'),
|
copyfile((lang_data_dir / 'tag_map.json').as_posix(),
|
||||||
str(model_dir / 'vocab' / 'tag_map.json'))
|
(model_dir / 'vocab' / 'tag_map.json').as_posix())
|
||||||
|
|
||||||
if (lang_data_dir / 'lemma_rules.json').exists():
|
if (lang_data_dir / 'lemma_rules.json').exists():
|
||||||
copyfile(str(lang_data_dir / 'lemma_rules.json'),
|
copyfile((lang_data_dir / 'lemma_rules.json').as_posix(),
|
||||||
str(model_dir / 'vocab' / 'lemma_rules.json'))
|
(model_dir / 'vocab' / 'lemma_rules.json').as_posix())
|
||||||
|
|
||||||
if not (model_dir / 'wordnet').exists() and (corpora_dir / 'wordnet').exists():
|
if not (model_dir / 'wordnet').exists() and (corpora_dir / 'wordnet').exists():
|
||||||
copytree(str(corpora_dir / 'wordnet' / 'dict'), str(model_dir / 'wordnet'))
|
copytree((corpora_dir / 'wordnet' / 'dict').as_posix(),
|
||||||
|
(model_dir / 'wordnet').as_posix())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -241,8 +241,6 @@ cdef class Vocab:
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
def dump(self, loc):
|
def dump(self, loc):
|
||||||
if path.exists(loc):
|
|
||||||
assert not path.isdir(loc)
|
|
||||||
cdef bytes bytes_loc = loc.encode('utf8') if type(loc) == unicode else loc
|
cdef bytes bytes_loc = loc.encode('utf8') if type(loc) == unicode else loc
|
||||||
|
|
||||||
cdef CFile fp = CFile(bytes_loc, 'wb')
|
cdef CFile fp = CFile(bytes_loc, 'wb')
|
||||||
|
|
Loading…
Reference in New Issue