From ca244f5f84871c658b7c49d35b5c0b0690ec3f2b Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 20 Dec 2018 17:32:04 +0100 Subject: [PATCH] Small fixes to displaCy (#3076) ## Description - [x] fix auto-detection of Jupyter notebooks (even if `jupyter=True` isn't set) - [x] add `displacy.set_render_wrapper` method to define a custom function called around the HTML markup generated in all calls to `displacy.render` (can be used to allow custom integrations, callbacks and page formatting) - [x] add option to customise host for web server - [x] show warning if `displacy.serve` is called from within Jupyter notebooks - [x] move error message to `spacy.errors.Errors`. ### Types of change enhancement ## Checklist - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [x] My changes don't require a change to the documentation, or if they do, I've added all required information. --- spacy/displacy/__init__.py | 37 ++++++++++++++++++++++++--- spacy/errors.py | 15 +++++++++-- spacy/tests/test_misc.py | 14 ++++++++++ spacy/util.py | 9 ++++--- website/api/_top-level/_displacy.jade | 6 +++++ 5 files changed, 72 insertions(+), 9 deletions(-) diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index 9a4b4f5d8..87c796d73 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -10,6 +10,7 @@ from ..util import is_in_jupyter _html = {} IS_JUPYTER = is_in_jupyter() +RENDER_WRAPPER = None def render( @@ -48,6 +49,8 @@ def render( parsed = [converter(doc, options) for doc in docs] if not manual else docs _html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip() html = _html["parsed"] + if RENDER_WRAPPER is not None: + html = RENDER_WRAPPER(html) if jupyter: # return HTML rendered by IPython display() from IPython.core.display import display, HTML @@ -56,7 +59,14 @@ def render( def serve( - docs, style="dep", page=True, minify=False, options={}, manual=False, port=5000 + docs, + style="dep", + page=True, + minify=False, + options={}, + manual=False, + port=5000, + host="0.0.0.0", ): """Serve displaCy visualisation. @@ -67,13 +77,17 @@ def serve( options (dict): Visualiser-specific options, e.g. colors. manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts. port (int): Port to serve visualisation. + host (unicode): Host to serve visualisation. """ from wsgiref import simple_server + if IS_JUPYTER: + user_warning(Warnings.W011) + render(docs, style=style, page=page, minify=minify, options=options, manual=manual) - httpd = simple_server.make_server("0.0.0.0", port, app) + httpd = simple_server.make_server(host, port, app) print("\nUsing the '{}' visualizer".format(style)) - print("Serving on port {}...\n".format(port)) + print("Serving on http://{}:{} ...\n".format(host, port)) try: httpd.serve_forever() except KeyboardInterrupt: @@ -153,3 +167,20 @@ def parse_ents(doc, options={}): user_warning(Warnings.W006) title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None return {"text": doc.text, "ents": ents, "title": title} + + +def set_render_wrapper(func): + """Set an optional wrapper function that is called around the generated + HTML markup on displacy.render. This can be used to allow integration into + other platforms, similar to Jupyter Notebooks that require functions to be + called around the HTML. It can also be used to implement custom callbacks + on render, or to embed the visualization in a custom page. + + func (callable): Function to call around markup before rendering it. Needs + to take one argument, the HTML markup, and should return the desired + output of displacy.render. + """ + global RENDER_WRAPPER + if not hasattr(func, "__call__"): + raise ValueError(Errors.E110.format(obj=type(func))) + RENDER_WRAPPER = func diff --git a/spacy/errors.py b/spacy/errors.py index 45cabc4ad..e4c879751 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -54,6 +54,12 @@ class Warnings(object): "package overwrites built-in factory.") W010 = ("As of v2.1.0, the PhraseMatcher doesn't have a phrase length " "limit anymore, so the max_length argument is now deprecated.") + W011 = ("It looks like you're calling displacy.serve from within a " + "Jupyter notebook or a similar environment. This likely means " + "you're already running a local web server, so there's no need to " + "make displaCy start another one. Instead, you should be able to " + "replace displacy.serve with displacy.render to show the " + "visualization.") @add_codes @@ -289,6 +295,7 @@ class Errors(object): "thing. For example, use `nlp.create_pipeline('sentencizer')`") E109 = ("Model for component '{name}' not initialized. Did you forget to load " "a model, or forget to call begin_training()?") + E110 = ("Invalid displaCy render wrapper. Expected callable, got: {obj}") @add_codes @@ -358,8 +365,12 @@ def _warn(message, warn_type="user"): message (unicode): The message to display. category (Warning): The Warning to show. """ - w_id = message.split("[", 1)[1].split("]", 1)[0] # get ID from string - if warn_type in SPACY_WARNING_TYPES and w_id not in SPACY_WARNING_IGNORE: + if message.startswith("["): + w_id = message.split("[", 1)[1].split("]", 1)[0] # get ID from string + else: + w_id = None + ignore_warning = w_id and w_id in SPACY_WARNING_IGNORE + if warn_type in SPACY_WARNING_TYPES and not ignore_warning: category = WARNINGS[warn_type] stack = inspect.stack()[-1] with warnings.catch_warnings(): diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py index f7f49cc0b..32cc514e5 100644 --- a/spacy/tests/test_misc.py +++ b/spacy/tests/test_misc.py @@ -72,6 +72,20 @@ def test_displacy_spans(en_vocab): assert html.startswith("TEST") + + def test_displacy_raises_for_wrong_type(en_vocab): with pytest.raises(ValueError): displacy.render("hello world") diff --git a/spacy/util.py b/spacy/util.py index ea662d3a3..13810857b 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -236,12 +236,13 @@ def is_in_jupyter(): RETURNS (bool): True if in Jupyter, False if not. """ + # https://stackoverflow.com/a/39662359/6400719 try: - cfg = get_ipython().config - if cfg["IPKernelApp"]["parent_appname"] == "ipython-notebook": - return True + shell = get_ipython().__class__.__name__ + if shell == "ZMQInteractiveShell": + return True # Jupyter notebook or qtconsole except NameError: - return False + return False # Probably standard Python interpreter return False diff --git a/website/api/_top-level/_displacy.jade b/website/api/_top-level/_displacy.jade index e6443cea8..f0e2dc93f 100644 --- a/website/api/_top-level/_displacy.jade +++ b/website/api/_top-level/_displacy.jade @@ -68,6 +68,12 @@ p +cell Port to serve visualization. +cell #[code 5000] + +row + +cell #[code host] + +cell unicode + +cell Host to serve visualization. + +cell #[code '0.0.0.0'] + +h(3, "displacy.render") displacy.render +tag method +tag-new(2)