From 8b20e9ad971b329a41460cc62726cfcf52a58f2a Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 12 Sep 2014 04:03:52 +0200 Subject: [PATCH] * Delete ununused _split method --- spacy/lang.pxd | 1 - spacy/lang.pyx | 22 ---------------------- 2 files changed, 23 deletions(-) diff --git a/spacy/lang.pxd b/spacy/lang.pxd index b2844f680..6e5bda97a 100644 --- a/spacy/lang.pxd +++ b/spacy/lang.pxd @@ -79,5 +79,4 @@ cdef class Language: cpdef Lexeme lookup(self, unicode text) cdef int _tokenize(self, Tokens tokens, Py_UNICODE* characters, size_t length) except -1 - cdef list _split(self, unicode string) cdef int _split_one(self, unicode word) diff --git a/spacy/lang.pyx b/spacy/lang.pyx index 819b9d2e9..d7707e37b 100644 --- a/spacy/lang.pyx +++ b/spacy/lang.pyx @@ -134,28 +134,6 @@ cdef class Language: node = node.tail break - cdef list _split(self, unicode string): - """Find how to split a contiguous span of non-space characters into substrings. - - This method calls find_split repeatedly. Most languages will want to - override _split_one, but it may be useful to override this instead. - - Args: - chunk (unicode): The string to be split, e.g. u"Mike's!" - - Returns: - substrings (list): The component substrings, e.g. [u"Mike", "'s", "!"]. - """ - substrings = [] - while string: - split = self._split_one(string) - if split == 0: - substrings.append(string) - break - substrings.append(string[:split]) - string = string[split:] - return substrings - cdef int _split_one(self, unicode word): return len(word)