2017-02-16 22:26:21 +00:00
|
|
|
# coding: utf-8
|
|
|
|
from __future__ import unicode_literals
|
2017-05-28 13:10:22 +00:00
|
|
|
import pytest
|
2017-02-16 22:26:21 +00:00
|
|
|
|
2017-02-16 22:28:51 +00:00
|
|
|
|
2017-02-16 22:26:21 +00:00
|
|
|
word2vec_str = """, -0.046107 -0.035951 -0.560418
|
|
|
|
de -0.648927 -0.400976 -0.527124
|
|
|
|
. 0.113685 0.439990 -0.634510
|
2017-02-16 22:49:19 +00:00
|
|
|
\u00A0 -1.499184 -0.184280 -0.598371"""
|
|
|
|
|
2017-02-16 22:26:21 +00:00
|
|
|
|
2017-05-28 13:10:22 +00:00
|
|
|
@pytest.mark.xfail
|
2017-02-16 22:49:19 +00:00
|
|
|
def test_issue834(en_vocab, text_file):
|
|
|
|
"""Test that no-break space (U+00A0) is detected as space by the load_vectors function."""
|
|
|
|
text_file.write(word2vec_str)
|
|
|
|
text_file.seek(0)
|
|
|
|
vector_length = en_vocab.load_vectors(text_file)
|
2017-02-16 22:26:21 +00:00
|
|
|
assert vector_length == 3
|