mirror of https://github.com/explosion/spaCy.git
revert init_model.py back to pre-german state (because it makes more sense)
simplify token.n_rights and token.n_lefts
This commit is contained in:
parent
5e2e8e951a
commit
5080077097
|
@ -98,7 +98,7 @@ def _read_probs(loc):
|
|||
return probs, probs['-OOV-']
|
||||
|
||||
|
||||
def _read_freqs(loc, max_length=100, min_doc_freq=0, min_freq=200):
|
||||
def _read_freqs(loc, max_length=100, min_doc_freq=5, min_freq=200):
|
||||
if not loc.exists():
|
||||
print("Warning: Frequencies file not found")
|
||||
return {}, 0.0
|
||||
|
@ -125,8 +125,7 @@ def _read_freqs(loc, max_length=100, min_doc_freq=0, min_freq=200):
|
|||
doc_freq = int(doc_freq)
|
||||
freq = int(freq)
|
||||
if doc_freq >= min_doc_freq and freq >= min_freq and len(key) < max_length:
|
||||
# word = literal_eval(key)
|
||||
word = key
|
||||
word = literal_eval(key)
|
||||
smooth_count = counts.smoother(int(freq))
|
||||
log_smooth_count = math.log(smooth_count)
|
||||
probs[word] = math.log(smooth_count) - log_total
|
||||
|
@ -166,7 +165,7 @@ def setup_vocab(get_lex_attr, tag_map, src_dir, dst_dir):
|
|||
clusters = _read_clusters(src_dir / 'clusters.txt')
|
||||
probs, oov_prob = _read_probs(src_dir / 'words.sgt.prob')
|
||||
if not probs:
|
||||
probs, oov_prob = _read_freqs(src_dir / 'freqs.txt')
|
||||
probs, oov_prob = _read_freqs(src_dir / 'freqs.txt.gz')
|
||||
if not probs:
|
||||
oov_prob = -20
|
||||
else:
|
||||
|
|
|
@ -179,23 +179,11 @@ cdef class Token:
|
|||
|
||||
property n_lefts:
|
||||
def __get__(self):
|
||||
cdef int n = 0
|
||||
cdef const TokenC* ptr = self.c - self.i
|
||||
while ptr != self.c:
|
||||
if ptr + ptr.head == self.c:
|
||||
n += 1
|
||||
ptr += 1
|
||||
return n
|
||||
return self.c.l_kids
|
||||
|
||||
property n_rights:
|
||||
def __get__(self):
|
||||
cdef int n = 0
|
||||
cdef const TokenC* ptr = self.c + (self.array_len - self.i)
|
||||
while ptr != self.c:
|
||||
if ptr + ptr.head == self.c:
|
||||
n += 1
|
||||
ptr -= 1
|
||||
return n
|
||||
return self.c.r_kids
|
||||
|
||||
property lefts:
|
||||
def __get__(self):
|
||||
|
|
Loading…
Reference in New Issue