From b5b869366bcf05e0210ed1924f4fa1efbb2b2c68 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 28 Jun 2015 06:18:58 +0200 Subject: [PATCH] * Adjust hyphenation rule in tokenizer --- lang_data/en/infix.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lang_data/en/infix.txt b/lang_data/en/infix.txt index b32be4c07..6bc58ff63 100644 --- a/lang_data/en/infix.txt +++ b/lang_data/en/infix.txt @@ -1,2 +1,3 @@ (?<=[a-z])\.(?=[A-Z]) -(?<=[a-zA-Z])-(?=[a-zA-z]) +(?<=[a-zA-Z0-9])-(?=[a-zA-z]) +(?<=[a-zA-Z])-(?=[0-9a-zA-z])