From 6076213c167640c5f2c4cf24ec2cd354b7a8bda0 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 25 Jul 2015 22:35:52 +0200 Subject: [PATCH 1/4] * Fix init_model script --- bin/init_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/init_model.py b/bin/init_model.py index d40e7813d..2d3c62368 100644 --- a/bin/init_model.py +++ b/bin/init_model.py @@ -91,7 +91,7 @@ def _read_probs(loc): def _read_freqs(loc): counts = PreshCounter() total = 0 - for line in open(loc): + for line in loc.open(): freq, doc_freq, key = line.split('\t', 2) freq = int(freq) counts[hash_string(key)] = freq @@ -99,7 +99,7 @@ def _read_freqs(loc): counts.smooth() log_total = math.log(total) probs = {} - for line in open(loc): + for line in loc.open(): freq, doc_freq, key = line.split('\t', 2) if int(doc_freq) >= 2 and int(freq) >= 5 and len(key) < 200: word = literal_eval(key) From 65f3ce6c52c688a26497cbb10c3a82b15f684f7c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 25 Jul 2015 22:36:43 +0200 Subject: [PATCH 2/4] * Require preshed 0.41 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7a5292e96..bf10a1165 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ cython cymem == 1.11 pathlib -preshed == 0.37 +preshed == 0.41 thinc == 3.3 murmurhash == 0.24 unidecode diff --git a/setup.py b/setup.py index 36f43a296..15c850627 100644 --- a/setup.py +++ b/setup.py @@ -119,7 +119,7 @@ def run_setup(exts): "spacy.syntax": ["*.pxd"]}, ext_modules=exts, license="Dual: Commercial or AGPL", - install_requires=['numpy', 'murmurhash', 'cymem >= 1.11', 'preshed == 0.37', + install_requires=['numpy', 'murmurhash', 'cymem >= 1.11', 'preshed == 0.41', 'thinc == 3.3', "unidecode", 'wget', 'plac', 'six', 'ujson'], setup_requires=["headers_workaround"], From 8e800adfbcfde54958b27b5ef41f8240a8384d27 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 25 Jul 2015 22:54:08 +0200 Subject: [PATCH 3/4] * Fix init_model --- bin/init_model.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/init_model.py b/bin/init_model.py index 2d3c62368..414e2b05c 100644 --- a/bin/init_model.py +++ b/bin/init_model.py @@ -89,6 +89,9 @@ def _read_probs(loc): def _read_freqs(loc): + if not loc.exists(): + print("Warning: Frequencies file not found") + return None counts = PreshCounter() total = 0 for line in loc.open(): From 3fe14b8ed6673b05199a64e2b397202baf75eba9 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 25 Jul 2015 22:55:53 +0200 Subject: [PATCH 4/4] * Fix CFile for Python2 --- spacy/cfile.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/cfile.pyx b/spacy/cfile.pyx index 6346302b9..781759466 100644 --- a/spacy/cfile.pyx +++ b/spacy/cfile.pyx @@ -5,6 +5,8 @@ cdef class CFile: def __init__(self, loc, mode): if isinstance(mode, unicode): mode_str = mode.encode('ascii') + else: + mode_str = mode cdef bytes bytes_loc = loc.encode('utf8') if type(loc) == unicode else loc self.fp = fopen(bytes_loc, mode_str) if self.fp == NULL: