From 7b56b2f04bb14ae02646921010743f5bfd759f48 Mon Sep 17 00:00:00 2001 From: Explosion Bot Date: Mon, 30 Oct 2017 16:08:50 +0100 Subject: [PATCH] Add Vocab.cfg attr, to hold stuff like oov probs --- spacy/vocab.pxd | 1 + spacy/vocab.pyx | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/spacy/vocab.pxd b/spacy/vocab.pxd index 8005cbf06..b12bccf38 100644 --- a/spacy/vocab.pxd +++ b/spacy/vocab.pxd @@ -32,6 +32,7 @@ cdef class Vocab: cdef readonly int length cdef public object data_dir cdef public object lex_attr_getters + cdef public object cfg cdef const LexemeC* get(self, Pool mem, unicode string) except NULL cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 8b09d7ee7..937d4b69d 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -27,7 +27,7 @@ cdef class Vocab: C-data that is shared between `Doc` objects. """ def __init__(self, lex_attr_getters=None, tag_map=None, lemmatizer=None, - strings=tuple(), **deprecated_kwargs): + strings=tuple(), oov_prob=-20., **deprecated_kwargs): """Create the vocabulary. lex_attr_getters (dict): A dictionary mapping attribute IDs to @@ -43,6 +43,7 @@ cdef class Vocab: tag_map = tag_map if tag_map is not None else {} if lemmatizer in (None, True, False): lemmatizer = Lemmatizer({}, {}, {}) + self.cfg = {'oov_prob': oov_prob} self.mem = Pool() self._by_hash = PreshMap() self._by_orth = PreshMap()