mirror of https://github.com/explosion/spaCy.git
* Rename Lexicon._dict to Lexicon._map
This commit is contained in:
parent
2ee8a1e61f
commit
8c2938fe01
|
@ -1,5 +1,7 @@
|
|||
from libcpp.vector cimport vector
|
||||
|
||||
from cpython cimport Py_UNICODE_ISSPACE, Py_UNICODE_ISALPHA, Py_UNICODE_ISUPPER
|
||||
|
||||
from preshed.maps cimport PreshMap
|
||||
from cymem.cymem cimport Pool
|
||||
|
||||
|
@ -7,17 +9,9 @@ from .typedefs cimport hash_t
|
|||
from .tokens cimport Tokens
|
||||
from .lexeme cimport Lexeme
|
||||
from .tagger cimport Tagger
|
||||
from .ner.greedy_parser cimport NERParser
|
||||
from .utf8string cimport StringStore
|
||||
|
||||
|
||||
cdef extern from "Python.h":
|
||||
cdef bint Py_UNICODE_ISSPACE(Py_UNICODE ch)
|
||||
cdef bint Py_UNICODE_ISALNUM(Py_UNICODE ch)
|
||||
cdef bint Py_UNICODE_ISALPHA(Py_UNICODE ch)
|
||||
cdef bint Py_UNICODE_ISUPPER(Py_UNICODE ch)
|
||||
|
||||
|
||||
cdef struct String:
|
||||
Py_UNICODE* chars
|
||||
size_t n
|
||||
|
@ -32,7 +26,7 @@ cdef class Lexicon:
|
|||
|
||||
cdef Lexeme* get(self, String* s) except NULL
|
||||
|
||||
cdef PreshMap _dict
|
||||
cdef PreshMap _map
|
||||
|
||||
|
||||
cdef class Language:
|
||||
|
@ -42,9 +36,6 @@ cdef class Language:
|
|||
cdef PreshMap _specials
|
||||
cpdef readonly Lexicon lexicon
|
||||
|
||||
cpdef readonly Tagger pos_tagger
|
||||
cpdef readonly NERParser ner_tagger
|
||||
|
||||
cdef object _prefix_re
|
||||
cdef object _suffix_re
|
||||
cdef object _infix_re
|
||||
|
|
|
@ -241,7 +241,7 @@ cdef class Lexicon:
|
|||
'''
|
||||
def __init__(self):
|
||||
self.mem = Pool()
|
||||
self._dict = PreshMap(2 ** 20)
|
||||
self._map = PreshMap(2 ** 20)
|
||||
self.strings = StringStore()
|
||||
self.lexemes.push_back(&EMPTY_LEXEME)
|
||||
self.size = 1
|
||||
|
@ -249,12 +249,12 @@ cdef class Lexicon:
|
|||
cdef Lexeme* get(self, String* string) except NULL:
|
||||
'''Retrieve a pointer to a Lexeme from the lexicon.'''
|
||||
cdef Lexeme* lex
|
||||
lex = <Lexeme*>self._dict.get(string.key)
|
||||
lex = <Lexeme*>self._map.get(string.key)
|
||||
if lex != NULL:
|
||||
return lex
|
||||
lex = <Lexeme*>self.mem.alloc(sizeof(Lexeme), 1)
|
||||
lex[0] = lexeme_init(self.size, string.chars[:string.n], string.key, self.strings, {})
|
||||
self._dict.set(string.key, lex)
|
||||
self._map.set(string.key, lex)
|
||||
while self.lexemes.size() < (lex.id + 1):
|
||||
self.lexemes.push_back(&EMPTY_LEXEME)
|
||||
self.lexemes[lex.id] = lex
|
||||
|
@ -302,11 +302,11 @@ cdef class Lexicon:
|
|||
assert fp != NULL
|
||||
cdef size_t st
|
||||
cdef hash_t key
|
||||
for i in range(self._dict.length):
|
||||
key = self._dict.c_map.cells[i].key
|
||||
for i in range(self._map.length):
|
||||
key = self._map.c_map.cells[i].key
|
||||
if key == 0:
|
||||
continue
|
||||
lexeme = <Lexeme*>self._dict.c_map.cells[i].value
|
||||
lexeme = <Lexeme*>self._map.c_map.cells[i].value
|
||||
st = fwrite(&key, sizeof(key), 1, fp)
|
||||
assert st == 1
|
||||
st = fwrite(lexeme, sizeof(Lexeme), 1, fp)
|
||||
|
@ -331,7 +331,7 @@ cdef class Lexicon:
|
|||
st = fread(lexeme, sizeof(Lexeme), 1, fp)
|
||||
if st != 1:
|
||||
break
|
||||
self._dict.set(key, lexeme)
|
||||
self._map.set(key, lexeme)
|
||||
while self.lexemes.size() < (lexeme.id + 1):
|
||||
self.lexemes.push_back(&EMPTY_LEXEME)
|
||||
self.lexemes[lexeme.id] = lexeme
|
||||
|
|
Loading…
Reference in New Issue