From 5a14a13f64361a646fd747bef7e1c2bda532c679 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Fri, 10 May 2019 05:21:34 -0700 Subject: [PATCH] fix thai bug (#3693) fix tokenize for pythainlp --- spacy/lang/th/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/lang/th/__init__.py b/spacy/lang/th/__init__.py index b3150fa2f..06970fbd7 100644 --- a/spacy/lang/th/__init__.py +++ b/spacy/lang/th/__init__.py @@ -28,7 +28,7 @@ class ThaiTokenizer(DummyTokenizer): self.vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp) def __call__(self, text): - words = list(self.word_tokenize(text, "newmm")) + words = list(self.word_tokenize(text)) spaces = [False] * len(words) return Doc(self.vocab, words=words, spaces=spaces)