Merge branch 'develop' of https://github.com/explosion/spaCy into develop

This commit is contained in:
Matthew Honnibal 2017-06-04 21:53:56 +02:00
commit 8a683a4494
3 changed files with 14 additions and 3 deletions

View File

@ -5,6 +5,7 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .tag_map import TAG_MAP
from .stop_words import STOP_WORDS
from .lemmatizer import LOOKUP
from .syntax_iterators import SYNTAX_ITERATORS
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..norm_exceptions import BASE_NORMS
@ -22,6 +23,7 @@ class SpanishDefaults(Language.Defaults):
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
tag_map = dict(TAG_MAP)
stop_words = set(STOP_WORDS)
sytax_iterators = dict(SYNTAX_ITERATORS)
@classmethod
def create_lemmatizer(cls, nlp=None):

View File

@ -478,7 +478,7 @@ def print_table(data, title=None):
if isinstance(data, dict):
data = list(data.items())
tpl_row = ' {:<15}' * len(data[0])
table = '\n'.join([tpl_row.format(l, v) for l, v in data])
table = '\n'.join([tpl_row.format(l, unicode_(v)) for l, v in data])
if title:
print('\n \033[93m{}\033[0m'.format(title))
print('\n{}\n'.format(table))
@ -491,11 +491,12 @@ def print_markdown(data, title=None):
title (unicode or None): Title, will be rendered as headline 2.
"""
def excl_value(value):
return Path(value).exists() # contains path (personal info)
# contains path, i.e. personal info
return isinstance(value, basestring_) and Path(value).exists()
if isinstance(data, dict):
data = list(data.items())
markdown = ["* **{}:** {}".format(l, v) for l, v in data if not excl_value(v)]
markdown = ["* **{}:** {}".format(l, unicode_(v)) for l, v in data if not excl_value(v)]
if title:
print("\n## {}".format(title))
print('\n{}\n'.format('\n'.join(markdown)))

View File

@ -78,6 +78,14 @@ p
| #[code like_num], which includes language-specific words like "ten"
| or "hundred".
+row
+cell #[strong Syntax iterators]
| #[+src(gh("spaCy", "spacy/lang/en/syntax_iterators.py")) syntax_iterators.py]
+cell
| Functions that compute views of a #[code Doc] object based on its
| syntax. At the moment, only used for
| #[+a("/docs/usage/dependency-parse#noun-chunks") noun chunks].
+row
+cell #[strong Lemmatizer]
| #[+src(gh("spacy-dev-resources", "templates/new_language/lemmatizer.py")) lemmatizer.py]