From 5357874bf74b05a40961ba05936f6009453a48b8 Mon Sep 17 00:00:00 2001 From: Swier Date: Wed, 5 Jul 2017 14:03:30 +0200 Subject: [PATCH 1/3] add Dutch numbers and ordinals --- spacy/nl/stop_words.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/spacy/nl/stop_words.py b/spacy/nl/stop_words.py index 22f1d714c..d19515262 100644 --- a/spacy/nl/stop_words.py +++ b/spacy/nl/stop_words.py @@ -41,3 +41,22 @@ want waren was wat we wel werd wezen wie wij wil worden zal ze zei zelf zich zij zijn zo zonder zou """.split()) + + +# Number words + +NUM_WORDS = set(""" +nul een één twee drie vier vijf zes zeven acht negen tien elf twaalf dertien +veertien twintig dertig veertig vijftig zestig zeventig tachtig negentig honderd +duizend miljoen miljard biljoen biljard triljoen triljard +""".split()) + + +# Ordinal words + +ORDINAL_WORDS = set(""" +eerste tweede derde vierde vijfde zesde zevende achtste negende tiende elfde +twaalfde dertiende veertiende twintigste dertigste veertigste vijftigste +zestigste zeventigste tachtigste negentigste honderdste duizendste miljoenste +miljardste biljoenste biljardste triljoenste triljardste +""".split()) From f377c9c952ed6b42086c0ee9fcedb5a67af963b4 Mon Sep 17 00:00:00 2001 From: Swier Date: Wed, 5 Jul 2017 14:06:28 +0200 Subject: [PATCH 2/3] Rename stop_words.py to word_sets.py --- spacy/nl/{stop_words.py => word_sets.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename spacy/nl/{stop_words.py => word_sets.py} (100%) diff --git a/spacy/nl/stop_words.py b/spacy/nl/word_sets.py similarity index 100% rename from spacy/nl/stop_words.py rename to spacy/nl/word_sets.py From 29720150f9960c1a57b2d463d4653e0a8f3211e0 Mon Sep 17 00:00:00 2001 From: Swier Date: Wed, 5 Jul 2017 14:08:04 +0200 Subject: [PATCH 3/3] fix import of stop words in language data --- spacy/nl/language_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/nl/language_data.py b/spacy/nl/language_data.py index f9899d8d1..b3ca1aef9 100644 --- a/spacy/nl/language_data.py +++ b/spacy/nl/language_data.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals from .. import language_data as base from ..language_data import update_exc, strings_to_exc -from .stop_words import STOP_WORDS +from .word_sets import STOP_WORDS, NUM_WORDS STOP_WORDS = set(STOP_WORDS)