From 6516cd4a195ba0a84b01a127a862230509481599 Mon Sep 17 00:00:00 2001 From: Gareth Dwyer Date: Sat, 22 Oct 2016 13:28:27 +0200 Subject: [PATCH 1/3] Add Windows Compiler installation instructions The ReadMe currently says that Windows Spacy Users should install a full Visual Studio version in order to compile Spacy. Some might prefer to only install the compiler provided by Microsoft (or MinGW if this is confirmed to work?). I added a link to the Wiki that points to the various Microsoft downloads, as these are quite difficult to find otherwise. --- README.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.rst b/README.rst index 389b4ea14..8a97d6de2 100644 --- a/README.rst +++ b/README.rst @@ -179,6 +179,11 @@ Install a version of Visual Studio Express or higher that matches the version that was used to compile your Python interpreter. For official distributions these are VS 2008 (Python 2.7), VS 2010 (Python 3.4) and VS 2015 (Python 3.5). +If you don't want to install the entire Visual Studio, you can install a +stand-alone compiler. Make sure that you install the correct version for +your version of Python. See https://wiki.python.org/moin/WindowsCompilers for +links to download these. + Run tests ========= From 6b30cbaf0b1d2d16c920400ba50c93bef109a75a Mon Sep 17 00:00:00 2001 From: chssch Date: Sat, 22 Oct 2016 15:05:41 +0200 Subject: [PATCH 2/3] Strings has be to on vocab object --- website/docs/tutorials/rule-based-matcher.jade | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/tutorials/rule-based-matcher.jade b/website/docs/tutorials/rule-based-matcher.jade index 8c8949631..53d76e145 100644 --- a/website/docs/tutorials/rule-based-matcher.jade +++ b/website/docs/tutorials/rule-based-matcher.jade @@ -32,7 +32,7 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens. doc = nlp(u"I prefer Siri to Google Now.") matches = matcher(doc) for ent_id, label, start, end in matches: - print(nlp.strings[ent_id], nlp.strings[label], doc[start : end].text) + print(nlp.vocab.strings[ent_id], nlp.vocab.strings[label], doc[start : end].text) entity = matcher.get_entity(ent_id) print(entity) From cf7b6f7a9db1756135280a5c274fb53642d3c34c Mon Sep 17 00:00:00 2001 From: chssch Date: Sat, 22 Oct 2016 15:07:56 +0200 Subject: [PATCH 3/3] Add merge phrases from https://github.com/explosion/spaCy/issues/523#issuecomment-255172782 --- website/docs/tutorials/rule-based-matcher.jade | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/website/docs/tutorials/rule-based-matcher.jade b/website/docs/tutorials/rule-based-matcher.jade index 53d76e145..900a86a63 100644 --- a/website/docs/tutorials/rule-based-matcher.jade +++ b/website/docs/tutorials/rule-based-matcher.jade @@ -9,6 +9,18 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens. nlp = spacy.load('en', parser=False, entity=False) + def merge_phrases(matcher, doc, i, matches): + ''' + Merge a phrase. We have to be careful here because we'll change the token indices. + To avoid problems, merge all the phrases once we're called on the last match. + ''' + if i != len(matches)-1: + return None + # Get Span objects + spans = [(ent_id, label, doc[start : end]) for ent_id, label, start, end in matches] + for ent_id, label, span in spans: + span.merge(label=label, tag='NNP' if label else span.root.tag_) + matcher = Matcher(nlp.vocab) matcher.add_entity( @@ -17,6 +29,7 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens. acceptor=None, # Accept or modify the match on_match=merge_phrases # Callback to act on the matches ) + matcher.add_pattern( "GoogleNow", # Entity ID -- Created if doesn't exist. [ # The pattern is a list of *Token Specifiers*.