diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d60b7d4bd..09e400e1f 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -41,7 +41,7 @@ To distinguish issues that are opened by us, the maintainers, we usually add a 
 | [`duplicate`](https://github.com/explosion/spaCy/labels/duplicate) | Duplicates, i.e. issues that have been reported before |
 | [`meta`](https://github.com/explosion/spaCy/labels/meta) | Meta topics, e.g. repo organisation and issue management |
 | [`help wanted`](https://github.com/explosion/spaCy/labels/help%20wanted) | Requests for contributions |
-| [`help wanted (easy)`](https://github.com/explosion/spaCy/labels/help%20wanted%20%28easy%29) | Requests for contributions suitable for begginners |
+| [`help wanted (easy)`](https://github.com/explosion/spaCy/labels/help%20wanted%20%28easy%29) | Requests for contributions suitable for beginners |
 
 ## Contributing to the code base
 
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 274cbdd6d..fae00b5e6 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -9,6 +9,7 @@ This is a list of everyone who has made significant contributions to spaCy, in a
 * Chris DuBois, [@chrisdubois](https://github.com/chrisdubois)
 * Christoph Schwienheer, [@chssch](https://github.com/chssch)
 * Dafne van Kuppevelt, [@dafnevk](https://github.com/dafnevk)
+* Daniel Rapp, [@rappdw](https://github.com/rappdw)
 * Dmytro Sadovnychyi, [@sadovnychyi](https://github.com/sadovnychyi)
 * György Orosz, [@oroszgy](https://github.com/oroszgy)
 * Henning Peters, [@henningpeters](https://github.com/henningpeters)
diff --git a/spacy/bn/tokenizer_exceptions.py b/spacy/bn/tokenizer_exceptions.py
index 7722c9dcc..a47b89280 100644
--- a/spacy/bn/tokenizer_exceptions.py
+++ b/spacy/bn/tokenizer_exceptions.py
@@ -27,10 +27,21 @@ ABBREVIATIONS = {
     "সে.": [
         {ORTH: "সে.", LEMMA: "সেলসিয়াস"},
     ],
-    "কি.মি": [
-        {ORTH: "কি.মি", LEMMA: "কিলোমিটার"},
+    "কি.মি.": [
         {ORTH: "কি.মি.", LEMMA: "কিলোমিটার"},
     ],
+    "কি.মি": [
+            {ORTH: "কি.মি", LEMMA: "কিলোমিটার"},
+    ],
+    "সে.মি.": [
+        {ORTH: "সে.মি.", LEMMA: "সেন্টিমিটার"},
+    ],
+    "সে.মি": [
+            {ORTH: "সে.মি", LEMMA: "সেন্টিমিটার"},
+    ],
+    "মি.লি.": [
+        {ORTH: "মি.লি.", LEMMA: "মিলিলিটার"},
+    ]
 }
 
 TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS)
diff --git a/spacy/language_data/tokenizer_exceptions.py b/spacy/language_data/tokenizer_exceptions.py
index 7d623cbb3..f01c2fdf5 100644
--- a/spacy/language_data/tokenizer_exceptions.py
+++ b/spacy/language_data/tokenizer_exceptions.py
@@ -45,6 +45,6 @@ _URL_PATTERN = (
     r"$"
 ).strip()
 
-TOKEN_MATCH = re.compile(_URL_PATTERN).match
+TOKEN_MATCH = re.compile(_URL_PATTERN, re.UNICODE).match
 
 __all__ = ['TOKEN_MATCH']
diff --git a/spacy/tests/tokenizer/test_urls.py b/spacy/tests/tokenizer/test_urls.py
index f4f9ef29e..316b25f12 100644
--- a/spacy/tests/tokenizer/test_urls.py
+++ b/spacy/tests/tokenizer/test_urls.py
@@ -21,11 +21,8 @@ URLS_FULL = URLS_BASIC + [
 URLS_SHOULD_MATCH = [
     "http://foo.com/blah_blah",
     "http://foo.com/blah_blah/",
-#    "http://foo.com/blah_blah_(wikipedia)",
-#    "http://foo.com/blah_blah_(wikipedia)_(again)",
     "http://www.example.com/wpstyle/?p=364",
     "https://www.example.com/foo/?bar=baz&inga=42&quux",
-    "http://✪df.ws/123",
     "http://userid:password@example.com:8080",
     "http://userid:password@example.com:8080/",
     "http://userid@example.com",
@@ -36,7 +33,6 @@ URLS_SHOULD_MATCH = [
     "http://userid:password@example.com/",
     "http://142.42.1.1/",
     "http://142.42.1.1:8080/",
-    "http://➡.ws/䨹",
     "http://⌘.ws",
     "http://⌘.ws/",
     "http://foo.com/blah_(wikipedia)#cite-1",
@@ -48,13 +44,19 @@ URLS_SHOULD_MATCH = [
     "http://j.mp",
     "ftp://foo.bar/baz",
     "http://foo.bar/?q=Test%20URL-encoded%20stuff",
-    "http://مثال.إختبار",
-    "http://例子.测试",
-#    "http://उदाहरण.परीक्षा",
     "http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com",
     "http://1337.net",
     "http://a.b-c.de",
     "http://223.255.255.254",
+    "http://a.b--c.de/", # this is a legit domain name see: https://gist.github.com/dperini/729294 comment on 9/9/2014
+    "http://✪df.ws/123",
+    "http://➡.ws/䨹",
+    "http://مثال.إختبار",
+    "http://例子.测试",
+
+    pytest.mark.xfail("http://उदाहरण.परीक्षा"),
+    pytest.mark.xfail("http://foo.com/blah_blah_(wikipedia)"),
+    pytest.mark.xfail("http://foo.com/blah_blah_(wikipedia)_(again)"),
 ]
 
 URLS_SHOULD_NOT_MATCH = [
@@ -74,7 +76,6 @@ URLS_SHOULD_NOT_MATCH = [
     "///a",
     "///",
     "http:///a",
-#    "foo.com",
     "rdar://1234",
     "h://test",
     "http:// shouldfail.com",
@@ -82,21 +83,22 @@ URLS_SHOULD_NOT_MATCH = [
     "http://foo.bar/foo(bar)baz quux",
     "ftps://foo.bar/",
     "http://-error-.invalid/",
-#    "http://a.b--c.de/", (this is a legit domain name see: https://gist.github.com/dperini/729294 comment on 9/9/2014
     "http://-a.b.co",
     "http://a.b-.co",
     "http://0.0.0.0",
     "http://10.1.1.0",
     "http://10.1.1.255",
     "http://224.1.1.1",
-#    "http://1.1.1.1.1",
     "http://123.123.123",
     "http://3628126748",
     "http://.www.foo.bar/",
-#    "http://www.foo.bar./",
     "http://.www.foo.bar./",
     "http://10.1.1.1",
-    "NASDAQ:GOOG"
+    "NASDAQ:GOOG",
+
+    pytest.mark.xfail("foo.com"),
+    pytest.mark.xfail("http://1.1.1.1.1"),
+    pytest.mark.xfail("http://www.foo.bar./"),
 ]