From 7db1a0e83e2ecb2f8311016038910ec99c6de560 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 4 Jun 2017 21:27:20 +0200 Subject: [PATCH 1/3] Make sure printed values are always strings --- spacy/util.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/spacy/util.py b/spacy/util.py index 9216edee8..cb1aec4c3 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -478,7 +478,7 @@ def print_table(data, title=None): if isinstance(data, dict): data = list(data.items()) tpl_row = ' {:<15}' * len(data[0]) - table = '\n'.join([tpl_row.format(l, v) for l, v in data]) + table = '\n'.join([tpl_row.format(l, unicode_(v)) for l, v in data]) if title: print('\n \033[93m{}\033[0m'.format(title)) print('\n{}\n'.format(table)) @@ -491,11 +491,12 @@ def print_markdown(data, title=None): title (unicode or None): Title, will be rendered as headline 2. """ def excl_value(value): - return Path(value).exists() # contains path (personal info) + # contains path, i.e. personal info + return isinstance(value, basestring_) and Path(value).exists() if isinstance(data, dict): data = list(data.items()) - markdown = ["* **{}:** {}".format(l, v) for l, v in data if not excl_value(v)] + markdown = ["* **{}:** {}".format(l, unicode_(v)) for l, v in data if not excl_value(v)] if title: print("\n## {}".format(title)) print('\n{}\n'.format('\n'.join(markdown))) From 9254a3dd78d7cff536b10e69ff0825880af2ba4c Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 4 Jun 2017 21:42:15 +0200 Subject: [PATCH 2/3] Import and add Spanish syntax iterators --- spacy/lang/es/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/lang/es/__init__.py b/spacy/lang/es/__init__.py index e20338b39..1e7f55be8 100644 --- a/spacy/lang/es/__init__.py +++ b/spacy/lang/es/__init__.py @@ -5,6 +5,7 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .tag_map import TAG_MAP from .stop_words import STOP_WORDS from .lemmatizer import LOOKUP +from .syntax_iterators import SYNTAX_ITERATORS from ..tokenizer_exceptions import BASE_EXCEPTIONS from ..norm_exceptions import BASE_NORMS @@ -22,6 +23,7 @@ class SpanishDefaults(Language.Defaults): tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tag_map = dict(TAG_MAP) stop_words = set(STOP_WORDS) + sytax_iterators = dict(SYNTAX_ITERATORS) @classmethod def create_lemmatizer(cls, nlp=None): From 990cb81556bdc71336c6806f2da01d745818f1c8 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 4 Jun 2017 21:47:22 +0200 Subject: [PATCH 3/3] Add info on syntax iterators --- website/docs/usage/_spacy-101/_language-data.jade | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/website/docs/usage/_spacy-101/_language-data.jade b/website/docs/usage/_spacy-101/_language-data.jade index c70bb5c7a..aaca10ebb 100644 --- a/website/docs/usage/_spacy-101/_language-data.jade +++ b/website/docs/usage/_spacy-101/_language-data.jade @@ -78,6 +78,14 @@ p | #[code like_num], which includes language-specific words like "ten" | or "hundred". + +row + +cell #[strong Syntax iterators] + | #[+src(gh("spaCy", "spacy/lang/en/syntax_iterators.py")) syntax_iterators.py] + +cell + | Functions that compute views of a #[code Doc] object based on its + | syntax. At the moment, only used for + | #[+a("/docs/usage/dependency-parse#noun-chunks") noun chunks]. + +row +cell #[strong Lemmatizer] | #[+src(gh("spacy-dev-resources", "templates/new_language/lemmatizer.py")) lemmatizer.py]