From c6fcd0369267c223a1cb5baeae877a0b372d0a1c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 30 Oct 2014 17:56:11 +1100 Subject: [PATCH] * Small efficiency tweak to lexeme init --- spacy/lexeme.pyx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 62804621d..98b8decff 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -43,8 +43,11 @@ cpdef Lexeme init(unicode string, hash_t hashed, lex.prefix = get_string_id(string[0], store) lex.suffix = get_string_id(string[-3:], store) - canon_cased = orth.canon_case(string, upper_pc, title_pc, lower_pc) - lex.norm = get_string_id(canon_cased, store) + if upper_pc or lower_pc or title_pc: + canon_cased = orth.canon_case(string, upper_pc, title_pc, lower_pc) + lex.norm = get_string_id(canon_cased, store) + else: + lex.norm = lex.sic lex.shape = get_string_id(orth.word_shape(string), store) lex.asciied = get_string_id(orth.asciied(string), store) non_sparse = orth.non_sparse(string, lex.prob, lex.cluster, upper_pc, title_pc, lower_pc)