spaCy/examples/streamlit_spacy.py

# coding: utf-8
"""
Example of a Streamlit app for an interactive spaCy model visualizer. You can
either download the script, or point streamlit run to the raw URL of this
file. For more details, see https://streamlit.io.

Installation:
pip install streamlit
python -m spacy download en_core_web_sm
python -m spacy download en_core_web_md
python -m spacy download de_core_news_sm

Usage:
streamlit run streamlit_spacy.py
"""
from __future__ import unicode_literals

import streamlit as st
import spacy
from spacy import displacy
import pandas as pd


SPACY_MODEL_NAMES = ["en_core_web_sm", "en_core_web_md", "de_core_news_sm"]
DEFAULT_TEXT = "Mark Zuckerberg is the CEO of Facebook."
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""


@st.cache(ignore_hash=True)
def load_model(name):
    return spacy.load(name)


@st.cache(ignore_hash=True)
def process_text(model_name, text):
    nlp = load_model(model_name)
    return nlp(text)


st.sidebar.title("Interactive spaCy visualizer")
st.sidebar.markdown(
    """
Process text with [spaCy](https://spacy.io) models and visualize named entities,
dependencies and more. Uses spaCy's built-in
[displaCy](http://spacy.io/usage/visualizers) visualizer under the hood.
"""
)

spacy_model = st.sidebar.selectbox("Model name", SPACY_MODEL_NAMES)
model_load_state = st.info(f"Loading model '{spacy_model}'...")
nlp = load_model(spacy_model)
model_load_state.empty()

text = st.text_area("Text to analyze", DEFAULT_TEXT)
doc = process_text(spacy_model, text)

if "parser" in nlp.pipe_names:
    st.header("Dependency Parse & Part-of-speech tags")
    st.sidebar.header("Dependency Parse")
    split_sents = st.sidebar.checkbox("Split sentences", value=True)
    collapse_punct = st.sidebar.checkbox("Collapse punctuation", value=True)
    collapse_phrases = st.sidebar.checkbox("Collapse phrases")
    compact = st.sidebar.checkbox("Compact mode")
    options = {
        "collapse_punct": collapse_punct,
        "collapse_phrases": collapse_phrases,
        "compact": compact,
    }
    docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
    for sent in docs:
        html = displacy.render(sent, options=options)
        # Double newlines seem to mess with the rendering
        html = html.replace("\n\n", "\n")
        if split_sents and len(docs) > 1:
            st.markdown(f"> {sent.text}")
        st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)

if "ner" in nlp.pipe_names:
    st.header("Named Entities")
    st.sidebar.header("Named Entities")
    label_set = nlp.get_pipe("ner").labels
    labels = st.sidebar.multiselect("Entity labels", label_set, label_set)
    html = displacy.render(doc, style="ent", options={"ents": labels})
    # Newlines seem to mess with the rendering
    html = html.replace("\n", " ")
    st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
    attrs = ["text", "label_", "start", "end", "start_char", "end_char"]
    if "entity_linker" in nlp.pipe_names:
        attrs.append("kb_id_")
    data = [
        [str(getattr(ent, attr)) for attr in attrs]
        for ent in doc.ents
        if ent.label_ in labels
    ]
    df = pd.DataFrame(data, columns=attrs)
    st.dataframe(df)


if "textcat" in nlp.pipe_names:
    st.header("Text Classification")
    st.markdown(f"> {text}")
    df = pd.DataFrame(doc.cats.items(), columns=("Label", "Score"))
    st.dataframe(df)


vector_size = nlp.meta.get("vectors", {}).get("width", 0)
if vector_size:
    st.header("Vectors & Similarity")
    st.code(nlp.meta["vectors"])
    text1 = st.text_input("Text or word 1", "apple")
    text2 = st.text_input("Text or word 2", "orange")
    doc1 = process_text(spacy_model, text1)
    doc2 = process_text(spacy_model, text2)
    similarity = doc1.similarity(doc2)
    if similarity > 0.5:
        st.success(similarity)
    else:
        st.error(similarity)

st.header("Token attributes")

if st.button("Show token attributes"):
    attrs = [
        "idx",
        "text",
        "lemma_",
        "pos_",
        "tag_",
        "dep_",
        "head",
        "ent_type_",
        "ent_iob_",
        "shape_",
        "is_alpha",
        "is_ascii",
        "is_digit",
        "is_punct",
        "like_num",
    ]
    data = [[str(getattr(token, attr)) for attr in attrs] for token in doc]
    df = pd.DataFrame(data, columns=attrs)
    st.dataframe(df)


st.header("JSON Doc")
if st.button("Show JSON Doc"):
    st.json(doc.to_json())

st.header("JSON model meta")
if st.button("Show JSON model meta"):
    st.json(nlp.meta)
Add Streamlit example [ci skip] 2019-10-01 23:21:20 +00:00			`# coding: utf-8`
			`"""`
			`Example of a Streamlit app for an interactive spaCy model visualizer. You can`
			`either download the script, or point streamlit run to the raw URL of this`
			`file. For more details, see https://streamlit.io.`

			`Installation:`
			`pip install streamlit`
			`python -m spacy download en_core_web_sm`
			`python -m spacy download en_core_web_md`
			`python -m spacy download de_core_news_sm`

			`Usage:`
			`streamlit run streamlit_spacy.py`
			`"""`
			`from __future__ import unicode_literals`

			`import streamlit as st`
			`import spacy`
			`from spacy import displacy`
			`import pandas as pd`


			`SPACY_MODEL_NAMES = ["en_core_web_sm", "en_core_web_md", "de_core_news_sm"]`
			`DEFAULT_TEXT = "Mark Zuckerberg is the CEO of Facebook."`
			`HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""`


			`@st.cache(ignore_hash=True)`
			`def load_model(name):`
			`return spacy.load(name)`


			`@st.cache(ignore_hash=True)`
			`def process_text(model_name, text):`
			`nlp = load_model(model_name)`
			`return nlp(text)`


			`st.sidebar.title("Interactive spaCy visualizer")`
			`st.sidebar.markdown(`
			`"""`
			`Process text with [spaCy](https://spacy.io) models and visualize named entities,`
			`dependencies and more. Uses spaCy's built-in`
			`[displaCy](http://spacy.io/usage/visualizers) visualizer under the hood.`
			`"""`
			`)`

			`spacy_model = st.sidebar.selectbox("Model name", SPACY_MODEL_NAMES)`
			`model_load_state = st.info(f"Loading model '{spacy_model}'...")`
			`nlp = load_model(spacy_model)`
			`model_load_state.empty()`

			`text = st.text_area("Text to analyze", DEFAULT_TEXT)`
			`doc = process_text(spacy_model, text)`

			`if "parser" in nlp.pipe_names:`
			`st.header("Dependency Parse & Part-of-speech tags")`
			`st.sidebar.header("Dependency Parse")`
			`split_sents = st.sidebar.checkbox("Split sentences", value=True)`
			`collapse_punct = st.sidebar.checkbox("Collapse punctuation", value=True)`
			`collapse_phrases = st.sidebar.checkbox("Collapse phrases")`
			`compact = st.sidebar.checkbox("Compact mode")`
			`options = {`
			`"collapse_punct": collapse_punct,`
			`"collapse_phrases": collapse_phrases,`
			`"compact": compact,`
			`}`
			`docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]`
			`for sent in docs:`
			`html = displacy.render(sent, options=options)`
			`# Double newlines seem to mess with the rendering`
			`html = html.replace("\n\n", "\n")`
			`if split_sents and len(docs) > 1:`
			`st.markdown(f"> {sent.text}")`
			`st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)`

			`if "ner" in nlp.pipe_names:`
			`st.header("Named Entities")`
			`st.sidebar.header("Named Entities")`
Tidy up [ci skip] 2019-10-02 10:05:59 +00:00			`label_set = nlp.get_pipe("ner").labels`
			`labels = st.sidebar.multiselect("Entity labels", label_set, label_set)`
Add Streamlit example [ci skip] 2019-10-01 23:21:20 +00:00			`html = displacy.render(doc, style="ent", options={"ents": labels})`
			`# Newlines seem to mess with the rendering`
			`html = html.replace("\n", " ")`
			`st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)`
			`attrs = ["text", "label_", "start", "end", "start_char", "end_char"]`
			`if "entity_linker" in nlp.pipe_names:`
			`attrs.append("kb_id_")`
			`data = [`
			`[str(getattr(ent, attr)) for attr in attrs]`
			`for ent in doc.ents`
			`if ent.label_ in labels`
			`]`
			`df = pd.DataFrame(data, columns=attrs)`
			`st.dataframe(df)`


			`if "textcat" in nlp.pipe_names:`
			`st.header("Text Classification")`
			`st.markdown(f"> {text}")`
			`df = pd.DataFrame(doc.cats.items(), columns=("Label", "Score"))`
			`st.dataframe(df)`


			`vector_size = nlp.meta.get("vectors", {}).get("width", 0)`
			`if vector_size:`
			`st.header("Vectors & Similarity")`
			`st.code(nlp.meta["vectors"])`
			`text1 = st.text_input("Text or word 1", "apple")`
			`text2 = st.text_input("Text or word 2", "orange")`
			`doc1 = process_text(spacy_model, text1)`
			`doc2 = process_text(spacy_model, text2)`
			`similarity = doc1.similarity(doc2)`
			`if similarity > 0.5:`
			`st.success(similarity)`
			`else:`
			`st.error(similarity)`

			`st.header("Token attributes")`

			`if st.button("Show token attributes"):`
			`attrs = [`
			`"idx",`
			`"text",`
			`"lemma_",`
			`"pos_",`
			`"tag_",`
			`"dep_",`
			`"head",`
			`"ent_type_",`
			`"ent_iob_",`
			`"shape_",`
			`"is_alpha",`
			`"is_ascii",`
			`"is_digit",`
			`"is_punct",`
			`"like_num",`
			`]`
			`data = [[str(getattr(token, attr)) for attr in attrs] for token in doc]`
			`df = pd.DataFrame(data, columns=attrs)`
			`st.dataframe(df)`


			`st.header("JSON Doc")`
			`if st.button("Show JSON Doc"):`
			`st.json(doc.to_json())`

			`st.header("JSON model meta")`
			`if st.button("Show JSON model meta"):`
			`st.json(nlp.meta)`