2017-05-14 15:50:23 +00:00
|
|
|
# coding: utf8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
from .render import DependencyRenderer, EntityRenderer
|
2018-06-25 12:55:16 +00:00
|
|
|
from ..tokens import Doc, Span
|
2017-07-31 23:11:35 +00:00
|
|
|
from ..compat import b_to_str
|
2018-04-03 13:50:31 +00:00
|
|
|
from ..errors import Errors, Warnings, user_warning
|
2018-11-30 19:16:14 +00:00
|
|
|
from ..util import is_in_jupyter
|
2017-05-14 15:50:23 +00:00
|
|
|
|
|
|
|
|
|
|
|
_html = {}
|
2017-05-18 12:13:14 +00:00
|
|
|
IS_JUPYTER = is_in_jupyter()
|
2017-05-14 15:50:23 +00:00
|
|
|
|
|
|
|
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
def render(
|
|
|
|
docs,
|
|
|
|
style="dep",
|
|
|
|
page=False,
|
|
|
|
minify=False,
|
|
|
|
jupyter=IS_JUPYTER,
|
|
|
|
options={},
|
|
|
|
manual=False,
|
|
|
|
):
|
2017-05-14 15:50:23 +00:00
|
|
|
"""Render displaCy visualisation.
|
|
|
|
|
|
|
|
docs (list or Doc): Document(s) to visualise.
|
|
|
|
style (unicode): Visualisation style, 'dep' or 'ent'.
|
|
|
|
page (bool): Render markup as full HTML page.
|
|
|
|
minify (bool): Minify HTML markup.
|
2017-05-22 16:48:20 +00:00
|
|
|
jupyter (bool): Experimental, use Jupyter's `display()` to output markup.
|
2017-05-14 15:50:23 +00:00
|
|
|
options (dict): Visualiser-specific options, e.g. colors.
|
2017-10-27 12:39:19 +00:00
|
|
|
manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
|
2017-05-14 17:30:47 +00:00
|
|
|
RETURNS (unicode): Rendered HTML markup.
|
2017-05-14 15:50:23 +00:00
|
|
|
"""
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
factories = {
|
|
|
|
"dep": (DependencyRenderer, parse_deps),
|
|
|
|
"ent": (EntityRenderer, parse_ents),
|
|
|
|
}
|
2017-05-22 16:48:20 +00:00
|
|
|
if style not in factories:
|
2018-04-03 13:50:31 +00:00
|
|
|
raise ValueError(Errors.E087.format(style=style))
|
2018-06-25 12:55:16 +00:00
|
|
|
if isinstance(docs, (Doc, Span, dict)):
|
2017-05-22 16:48:20 +00:00
|
|
|
docs = [docs]
|
2018-06-25 12:55:16 +00:00
|
|
|
docs = [obj if not isinstance(obj, Span) else obj.as_doc() for obj in docs]
|
|
|
|
if not all(isinstance(obj, (Doc, Span, dict)) for obj in docs):
|
|
|
|
raise ValueError(Errors.E096)
|
2017-05-22 16:48:20 +00:00
|
|
|
renderer, converter = factories[style]
|
|
|
|
renderer = renderer(options=options)
|
|
|
|
parsed = [converter(doc, options) for doc in docs] if not manual else docs
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
_html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip()
|
|
|
|
html = _html["parsed"]
|
2017-10-27 12:39:19 +00:00
|
|
|
if jupyter: # return HTML rendered by IPython display()
|
2017-05-14 16:39:01 +00:00
|
|
|
from IPython.core.display import display, HTML
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
|
2017-05-14 16:39:01 +00:00
|
|
|
return display(HTML(html))
|
|
|
|
return html
|
2017-05-14 15:50:23 +00:00
|
|
|
|
|
|
|
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
def serve(
|
|
|
|
docs, style="dep", page=True, minify=False, options={}, manual=False, port=5000
|
|
|
|
):
|
2017-05-14 15:50:23 +00:00
|
|
|
"""Serve displaCy visualisation.
|
|
|
|
|
|
|
|
docs (list or Doc): Document(s) to visualise.
|
|
|
|
style (unicode): Visualisation style, 'dep' or 'ent'.
|
|
|
|
page (bool): Render markup as full HTML page.
|
|
|
|
minify (bool): Minify HTML markup.
|
|
|
|
options (dict): Visualiser-specific options, e.g. colors.
|
2017-10-27 12:39:19 +00:00
|
|
|
manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
|
2017-05-14 15:50:23 +00:00
|
|
|
port (int): Port to serve visualisation.
|
|
|
|
"""
|
|
|
|
from wsgiref import simple_server
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
|
|
|
|
render(docs, style=style, page=page, minify=minify, options=options, manual=manual)
|
|
|
|
httpd = simple_server.make_server("0.0.0.0", port, app)
|
2018-11-30 19:16:14 +00:00
|
|
|
print("\nUsing the '{}' visualizer".format(style))
|
|
|
|
print("Serving on port {}...\n".format(port))
|
2017-06-03 11:24:56 +00:00
|
|
|
try:
|
|
|
|
httpd.serve_forever()
|
|
|
|
except KeyboardInterrupt:
|
2018-11-30 19:16:14 +00:00
|
|
|
print("Shutting down server on port {}.".format(port))
|
2017-06-03 11:24:56 +00:00
|
|
|
finally:
|
|
|
|
httpd.server_close()
|
2017-05-14 15:50:23 +00:00
|
|
|
|
|
|
|
|
|
|
|
def app(environ, start_response):
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
# Headers and status need to be bytes in Python 2, see #1227
|
|
|
|
headers = [(b_to_str(b"Content-type"), b_to_str(b"text/html; charset=utf-8"))]
|
|
|
|
start_response(b_to_str(b"200 OK"), headers)
|
|
|
|
res = _html["parsed"].encode(encoding="utf-8")
|
2017-05-14 15:50:23 +00:00
|
|
|
return [res]
|
|
|
|
|
|
|
|
|
2017-06-03 11:24:43 +00:00
|
|
|
def parse_deps(orig_doc, options={}):
|
2017-05-14 15:50:23 +00:00
|
|
|
"""Generate dependency parse in {'words': [], 'arcs': []} format.
|
|
|
|
|
|
|
|
doc (Doc): Document do parse.
|
|
|
|
RETURNS (dict): Generated dependency parse keyed by words and arcs.
|
|
|
|
"""
|
2017-06-03 11:24:43 +00:00
|
|
|
doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes())
|
2018-04-03 13:50:31 +00:00
|
|
|
if not doc.is_parsed:
|
|
|
|
user_warning(Warnings.W005)
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
if options.get("collapse_phrases", False):
|
2018-04-28 21:06:50 +00:00
|
|
|
for np in list(doc.noun_chunks):
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
np.merge(tag=np.root.tag_, lemma=np.root.lemma_, ent_type=np.root.ent_type_)
|
|
|
|
if options.get("collapse_punct", True):
|
2017-05-14 15:50:23 +00:00
|
|
|
spans = []
|
|
|
|
for word in doc[:-1]:
|
|
|
|
if word.is_punct or not word.nbor(1).is_punct:
|
|
|
|
continue
|
|
|
|
start = word.i
|
|
|
|
end = word.i + 1
|
|
|
|
while end < len(doc) and doc[end].is_punct:
|
|
|
|
end += 1
|
2017-10-27 12:39:19 +00:00
|
|
|
span = doc[start:end]
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
spans.append(
|
|
|
|
(span.start_char, span.end_char, word.tag_, word.lemma_, word.ent_type_)
|
|
|
|
)
|
2018-01-27 10:25:05 +00:00
|
|
|
for start, end, tag, lemma, ent_type in spans:
|
|
|
|
doc.merge(start, end, tag=tag, lemma=lemma, ent_type=ent_type)
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
if options.get("fine_grained"):
|
|
|
|
words = [{"text": w.text, "tag": w.tag_} for w in doc]
|
2017-12-09 14:11:12 +00:00
|
|
|
else:
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
words = [{"text": w.text, "tag": w.pos_} for w in doc]
|
2017-05-14 15:50:23 +00:00
|
|
|
arcs = []
|
|
|
|
for word in doc:
|
|
|
|
if word.i < word.head.i:
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
arcs.append(
|
|
|
|
{"start": word.i, "end": word.head.i, "label": word.dep_, "dir": "left"}
|
|
|
|
)
|
2017-05-14 15:50:23 +00:00
|
|
|
elif word.i > word.head.i:
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
arcs.append(
|
|
|
|
{
|
|
|
|
"start": word.head.i,
|
|
|
|
"end": word.i,
|
|
|
|
"label": word.dep_,
|
|
|
|
"dir": "right",
|
|
|
|
}
|
|
|
|
)
|
|
|
|
return {"words": words, "arcs": arcs}
|
2017-05-14 15:50:23 +00:00
|
|
|
|
|
|
|
|
|
|
|
def parse_ents(doc, options={}):
|
|
|
|
"""Generate named entities in [{start: i, end: i, label: 'label'}] format.
|
|
|
|
|
|
|
|
doc (Doc): Document do parse.
|
|
|
|
RETURNS (dict): Generated entities keyed by text (original text) and ents.
|
|
|
|
"""
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
ents = [
|
|
|
|
{"start": ent.start_char, "end": ent.end_char, "label": ent.label_}
|
|
|
|
for ent in doc.ents
|
|
|
|
]
|
2018-04-03 13:50:31 +00:00
|
|
|
if not ents:
|
|
|
|
user_warning(Warnings.W006)
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 16:03:03 +00:00
|
|
|
title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None
|
|
|
|
return {"text": doc.text, "ents": ents, "title": title}
|