mirror of https://github.com/explosion/spaCy.git
Update and fix lightning tour examples
This commit is contained in:
parent
4b5540cc63
commit
dcb10da615
|
@ -101,15 +101,15 @@ p
|
||||||
doc_dep = nlp(u'This is a sentence.')
|
doc_dep = nlp(u'This is a sentence.')
|
||||||
displacy.serve(doc_dep, style='dep')
|
displacy.serve(doc_dep, style='dep')
|
||||||
|
|
||||||
doc_ent = nlp(u'When Sebastian Thrun started working on self-driving cars at '
|
doc_ent = nlp(u'When Sebastian Thrun started working on self-driving cars at Google '
|
||||||
u'Google in 2007, few people outside of the company took him seriously.')
|
u'in 2007, few people outside of the company took him seriously.')
|
||||||
displacy.serve(doc_ent, style='ent')
|
displacy.serve(doc_ent, style='ent')
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong API:] #[+api("displacy") #[code displacy]]
|
| #[strong API:] #[+api("displacy") #[code displacy]]
|
||||||
| #[strong Usage:] #[+a("/docs/usage/visualizers") Visualizers]
|
| #[strong Usage:] #[+a("/docs/usage/visualizers") Visualizers]
|
||||||
|
|
||||||
+h(2, "examples-word-vectors") Word vectors
|
+h(2, "examples-word-vectors") Get word vectors and similarity
|
||||||
+tag-model("word vectors")
|
+tag-model("word vectors")
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
|
@ -119,6 +119,7 @@ p
|
||||||
pasta = doc[6]
|
pasta = doc[6]
|
||||||
hippo = doc[8]
|
hippo = doc[8]
|
||||||
assert apple.similarity(banana) > pasta.similarity(hippo)
|
assert apple.similarity(banana) > pasta.similarity(hippo)
|
||||||
|
assert apple.has_vector, banana.has_vector, pasta.has_vector, hippo.has_vector
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong Usage:] #[+a("/docs/usage/word-vectors-similarities") Word vectors and similarity]
|
| #[strong Usage:] #[+a("/docs/usage/word-vectors-similarities") Word vectors and similarity]
|
||||||
|
@ -139,6 +140,23 @@ p
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong Usage:] #[+a("/docs/usage/saving-loading") Saving and loading]
|
| #[strong Usage:] #[+a("/docs/usage/saving-loading") Saving and loading]
|
||||||
|
|
||||||
|
+h(2, "rule-matcher") Match text with token rules
|
||||||
|
|
||||||
|
+code.
|
||||||
|
import spacy
|
||||||
|
from spacy.matcher import Matcher
|
||||||
|
|
||||||
|
nlp = spacy.load('en')
|
||||||
|
matcher = Matcher(nlp.vocab)
|
||||||
|
# match "Google I/O" or "Google i/o"
|
||||||
|
pattern = [{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}]
|
||||||
|
matcher.add('GoogleIO', None, pattern)
|
||||||
|
matches = nlp(LOTS_OF TEXT)
|
||||||
|
|
||||||
|
+infobox
|
||||||
|
| #[strong API:] #[+api("matcher") #[code Matcher]]
|
||||||
|
| #[strong Usage:] #[+a("/docs/usage/rule-based-matching") Rule-based matching]
|
||||||
|
|
||||||
+h(2, "multi-threaded") Multi-threaded generator
|
+h(2, "multi-threaded") Multi-threaded generator
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
|
@ -183,28 +201,24 @@ p
|
||||||
assert doc[0].like_url == doc_array[0, 1]
|
assert doc[0].like_url == doc_array[0, 1]
|
||||||
assert list(doc_array[:, 1]) == [t.like_url for t in doc]
|
assert list(doc_array[:, 1]) == [t.like_url for t in doc]
|
||||||
|
|
||||||
+h(2, "examples-inline") Calculate inline mark-up on original string
|
+h(2, "examples-inline") Calculate inline markup on original string
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
def put_spans_around_tokens(doc, get_classes):
|
def put_spans_around_tokens(doc, get_classes):
|
||||||
'''Given some function to compute class names, put each token in a
|
"""Given some function to compute class names, put each token in a
|
||||||
span element, with the appropriate classes computed.
|
span element, with the appropriate classes computed. All whitespace is
|
||||||
|
preserved, outside of the spans. (Of course, HTML won't display more than
|
||||||
All whitespace is preserved, outside of the spans. (Yes, I know HTML
|
one whitespace character it – but the point is, no information is lost
|
||||||
won't display it. But the point is no information is lost, so you can
|
and you can calculate what you need, e.g. <br />, <p> etc.)
|
||||||
calculate what you need, e.g. <br /> tags, <p> tags, etc.)
|
"""
|
||||||
'''
|
|
||||||
output = []
|
output = []
|
||||||
template = '<span classes="{classes}">{word}</span>{space}'
|
html = '<span class="{classes}">{word}</span>{space}'
|
||||||
for token in doc:
|
for token in doc:
|
||||||
if token.is_space:
|
if token.is_space:
|
||||||
output.append(token.orth_)
|
output.append(token.text)
|
||||||
else:
|
else:
|
||||||
output.append(
|
classes = ' '.join(get_classes(token))
|
||||||
template.format(
|
output.append(html.format(classes=classes, word=token.text, space=token.whitespace_))
|
||||||
classes=' '.join(get_classes(token)),
|
|
||||||
word=token.orth_,
|
|
||||||
space=token.whitespace_))
|
|
||||||
string = ''.join(output)
|
string = ''.join(output)
|
||||||
string = string.replace('\n', '')
|
string = string.replace('\n', '')
|
||||||
string = string.replace('\t', ' ')
|
string = string.replace('\t', ' ')
|
||||||
|
|
Loading…
Reference in New Issue