mirror of https://github.com/explosion/spaCy.git
Fix Japanese tokenizer
JapaneseTokenizer now returns a Doc, not individual words
This commit is contained in:
parent
5ae0b8613a
commit
66f8f9d4a0
|
@ -33,8 +33,7 @@ class Japanese(Language):
|
|||
Defaults = JapaneseDefaults
|
||||
|
||||
def make_doc(self, text):
|
||||
words = self.tokenizer(text)
|
||||
return Doc(self.vocab, words=words, spaces=[False]*len(words))
|
||||
return self.tokenizer(text)
|
||||
|
||||
|
||||
__all__ = ['Japanese']
|
||||
|
|
Loading…
Reference in New Issue