mirror of https://github.com/explosion/spaCy.git
Fix Japanese tokenizer
JapaneseTokenizer now returns a Doc, not individual words
This commit is contained in:
parent
5ae0b8613a
commit
66f8f9d4a0
|
@ -33,8 +33,7 @@ class Japanese(Language):
|
||||||
Defaults = JapaneseDefaults
|
Defaults = JapaneseDefaults
|
||||||
|
|
||||||
def make_doc(self, text):
|
def make_doc(self, text):
|
||||||
words = self.tokenizer(text)
|
return self.tokenizer(text)
|
||||||
return Doc(self.vocab, words=words, spaces=[False]*len(words))
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['Japanese']
|
__all__ = ['Japanese']
|
||||||
|
|
Loading…
Reference in New Issue