Fix Japanese tokenizer

JapaneseTokenizer now returns a Doc, not individual words
This commit is contained in:
ines 2017-10-24 13:02:19 +02:00
parent 5ae0b8613a
commit 66f8f9d4a0
1 changed files with 1 additions and 2 deletions

View File

@ -33,8 +33,7 @@ class Japanese(Language):
Defaults = JapaneseDefaults Defaults = JapaneseDefaults
def make_doc(self, text): def make_doc(self, text):
words = self.tokenizer(text) return self.tokenizer(text)
return Doc(self.vocab, words=words, spaces=[False]*len(words))
__all__ = ['Japanese'] __all__ = ['Japanese']