From 66f8f9d4a0476f84a130f9e7ba5c7f69f4da02e4 Mon Sep 17 00:00:00 2001 From: ines Date: Tue, 24 Oct 2017 13:02:19 +0200 Subject: [PATCH] Fix Japanese tokenizer JapaneseTokenizer now returns a Doc, not individual words --- spacy/lang/ja/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py index 3a9c58fca..04cc013a4 100644 --- a/spacy/lang/ja/__init__.py +++ b/spacy/lang/ja/__init__.py @@ -33,8 +33,7 @@ class Japanese(Language): Defaults = JapaneseDefaults def make_doc(self, text): - words = self.tokenizer(text) - return Doc(self.vocab, words=words, spaces=[False]*len(words)) + return self.tokenizer(text) __all__ = ['Japanese']