From f12b0433085645b6bbd74816240a37aab0c29bfd Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 17 Apr 2016 15:19:17 +0200
Subject: [PATCH 1/3] * Add test for Issue #242: Overlapping matches not well
 recognised.

---
 spacy/tests/matcher/test_matcher_bugfixes.py | 27 ++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/spacy/tests/matcher/test_matcher_bugfixes.py b/spacy/tests/matcher/test_matcher_bugfixes.py
index 275c942c5..a80ec43b5 100644
--- a/spacy/tests/matcher/test_matcher_bugfixes.py
+++ b/spacy/tests/matcher/test_matcher_bugfixes.py
@@ -1,8 +1,11 @@
 import pytest
 import numpy
+import os
 
+import spacy
 from spacy.matcher import Matcher
 from spacy.attrs import ORTH, LOWER, ENT_IOB, ENT_TYPE
+from spacy.attrs import ORTH, TAG, LOWER, IS_ALPHA, FLAG63
 from spacy.symbols import DATE
 
 
@@ -31,6 +34,30 @@ def test_overlap_issue118(EN):
     assert ents[0].end == 11
 
 
+def test_overlap_issue242():
+    '''Test bug from multi-word phrases breaking text representation.'''
+
+    patterns = [
+        [{LOWER: 'food'}, {LOWER: 'safety'}],
+        [{LOWER: 'safety'}, {LOWER: 'standards'}],
+    ]
+
+    if os.environ.get('SPACY_DATA'):
+        data_dir = os.environ.get('SPACY_DATA')
+    else:
+        data_dir = None
+ 
+    nlp = spacy.en.English(data_dir=data_dir, tagger=False, parser=False, entity=False)
+
+    nlp.matcher.add('FOOD', 'FOOD', {}, patterns)
+
+    doc = nlp(u'There are different food safety standards in different countries.')
+
+    food_safety, safety_standards = doc.ents
+    assert food_safety.text == u'food safety'
+    assert safety_standards.text == u'safety standards'
+
+
 def test_overlap_reorder(EN):
     '''Test order dependence'''
     doc = EN.tokenizer(u'how many points did lebron james score against the boston celtics last night')

From 2b419d5b8c9dbf33763dd4b99c219de12306c1d9 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 17 Apr 2016 15:34:23 +0200
Subject: [PATCH 2/3] * Update test for Issue #242

---
 spacy/tests/matcher/test_matcher_bugfixes.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/spacy/tests/matcher/test_matcher_bugfixes.py b/spacy/tests/matcher/test_matcher_bugfixes.py
index a80ec43b5..a125d1a99 100644
--- a/spacy/tests/matcher/test_matcher_bugfixes.py
+++ b/spacy/tests/matcher/test_matcher_bugfixes.py
@@ -51,11 +51,12 @@ def test_overlap_issue242():
 
     nlp.matcher.add('FOOD', 'FOOD', {}, patterns)
 
-    doc = nlp(u'There are different food safety standards in different countries.')
-
-    food_safety, safety_standards = doc.ents
-    assert food_safety.text == u'food safety'
-    assert safety_standards.text == u'safety standards'
+    doc = nlp.tokenizer(u'There are different food safety standards in different countries.')
+    food_safety, safety_standards = nlp.matcher(doc)
+    assert food_safety[1] == 3
+    assert food_safety[2] == 5
+    assert safety_standards[1] == 4
+    assert safety_standards[2] == 6
 
 
 def test_overlap_reorder(EN):

From 2add5206aa34f9e393afe51df09483d6a3fcc2d7 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 17 Apr 2016 15:40:21 +0200
Subject: [PATCH 3/3] * Fix description of matcher test

---
 spacy/tests/matcher/test_matcher_bugfixes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/tests/matcher/test_matcher_bugfixes.py b/spacy/tests/matcher/test_matcher_bugfixes.py
index a125d1a99..e7b9c75b0 100644
--- a/spacy/tests/matcher/test_matcher_bugfixes.py
+++ b/spacy/tests/matcher/test_matcher_bugfixes.py
@@ -35,7 +35,7 @@ def test_overlap_issue118(EN):
 
 
 def test_overlap_issue242():
-    '''Test bug from multi-word phrases breaking text representation.'''
+    '''Test overlapping multi-word phrases.'''
 
     patterns = [
         [{LOWER: 'food'}, {LOWER: 'safety'}],