mirror of https://github.com/explosion/spaCy.git
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
9b6a5df15e
|
@ -0,0 +1,18 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
>>> from spacy.lang.da.examples import sentences
|
||||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple overvejer at købe et britisk statup for 1 milliard dollar",
|
||||
"Selvkørende biler flytter forsikringsansvaret over på producenterne",
|
||||
"San Francisco overvejer at forbyde leverandørrobotter på fortov",
|
||||
"London er en stor by i Storbritannien"
|
||||
]
|
|
@ -0,0 +1,22 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
>>> from spacy.lang.de.examples import sentences
|
||||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Die ganze Stadt ist ein Startup: Shenzhen ist das Silicon Valley für Hardware-Firmen",
|
||||
"Wie deutsche Startups die Technologie vorantreiben wollen: Künstliche Intelligenz",
|
||||
"Trend zum Urlaub in Deutschland beschert Gastwirten mehr Umsatz",
|
||||
"Bundesanwaltschaft erhebt Anklage gegen mutmaßlichen Schweizer Spion",
|
||||
"San Francisco erwägt Verbot von Lieferrobotern",
|
||||
"Autonome Fahrzeuge verlagern Haftpflicht auf Hersteller",
|
||||
"Wo bist du?",
|
||||
"Was ist die Hauptstadt von Deutschland?"
|
||||
]
|
|
@ -0,0 +1,22 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
>>> from spacy.lang.en.examples import sentences
|
||||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple is looking at buying U.K. startup for $1 billion",
|
||||
"Autonomous cars shift insurance liability toward manufacturers",
|
||||
"San Francisco considers banning sidewalk delivery robots",
|
||||
"London is a big city in the United Kingdom.",
|
||||
"Where are you?",
|
||||
"Who is the president of France?",
|
||||
"What is the capital of the United States?",
|
||||
"When was Barack Obama born?"
|
||||
]
|
|
@ -0,0 +1,22 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
>>> from spacy.lang.es.examples import sentences
|
||||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple está buscando comprar una startup del Reino Unido por mil millones de dólares",
|
||||
"Los coches autónomos delegan la responsabilidad del seguro en sus fabricantes",
|
||||
"San Francisco analiza prohibir los robots delivery",
|
||||
"Londres es una gran ciudad del Reino Unido",
|
||||
"El gato come pescado",
|
||||
"Veo al hombre con el telescopio",
|
||||
"La araña come moscas",
|
||||
"El pingüino incuba en su nido"
|
||||
]
|
|
@ -0,0 +1,26 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
>>> from spacy.lang.fr.examples import sentences
|
||||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple cherche a acheter une startup anglaise pour 1 milliard de dollard",
|
||||
"Les voitures autonomes voient leur assurances décalées vers les constructeurs",
|
||||
"San Francisco envisage d'interdire les robots coursiers",
|
||||
"Londres est une grande ville du Royaume-Uni",
|
||||
"L’Italie choisit ArcelorMittal pour reprendre la plus grande aciérie d’Europe",
|
||||
"Apple lance HomePod parce qu'il se sent menacé par l'Echo d'Amazon",
|
||||
"La France ne devrait pas manquer d'électricité cet été, même en cas de canicule",
|
||||
"Nouvelles attaques de Trump contre le maire de Londres",
|
||||
"Où es-tu ?",
|
||||
"Qui est le président de la France ?",
|
||||
"Où est la capitale des Etats-Unis ?",
|
||||
"Quand est né Barack Obama ?"
|
||||
]
|
|
@ -0,0 +1,28 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
>>> from spacy.lang.he.examples import sentences
|
||||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
'סין מקימה קרן של 440 מיליון דולר להשקעה בהייטק בישראל',
|
||||
'רה"מ הודיע כי יחרים טקס בחסותו',
|
||||
'הכנסת צפויה לאשר איכון אוטומטי של שיחות למוקד 100',
|
||||
'תוכנית לאומית תהפוך את ישראל למעצמה דיגיטלית',
|
||||
'סע לשלום, המפתחות בפנים.',
|
||||
'מלצר, פעמיים טורקי!',
|
||||
'ואהבת לרעך כמוך.',
|
||||
'היום נעשה משהו בלתי נשכח.',
|
||||
'איפה הילד?',
|
||||
'מיהו נשיא צרפת?',
|
||||
'מהי בירת ארצות הברית?',
|
||||
"איך קוראים בעברית לצ'ופצ'יק של הקומקום?",
|
||||
'מה הייתה הדקה?',
|
||||
'מי אומר שלום ראשון, זה שעולה או זה שיורד?'
|
||||
]
|
|
@ -0,0 +1,18 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
>>> from spacy.lang.it.examples import sentences
|
||||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple vuole comprare una startup del Regno Unito per un miliardo di dollari",
|
||||
"Le automobili a guida autonoma spostano la responsabilità assicurativa verso i produttori",
|
||||
"San Francisco prevede di bandire i robot di consegna porta a porta",
|
||||
"Londra è una grande città del Regno Unito."
|
||||
]
|
|
@ -0,0 +1,18 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
>>> from spacy.lang.nb.examples import sentences
|
||||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple vurderer å kjøpe britisk oppstartfirma for en milliard dollar",
|
||||
"Selvkjørende biler flytter forsikringsansvaret over på produsentene ",
|
||||
"San Francisco vurderer å forby robotbud på fortauene",
|
||||
"London er en stor by i Storbritannia."
|
||||
]
|
|
@ -0,0 +1,20 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
>>> from spacy.lang.pl.examples import sentences
|
||||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Poczuł przyjemną woń mocnej kawy.",
|
||||
"Istnieje wiele dróg oddziaływania substancji psychoaktywnej na układ nerwowy.",
|
||||
"Powitał mnie biało-czarny kot, płosząc siedzące na płocie trzy dorodne dudki.",
|
||||
"Nowy abonament pod lupą Komisji Europejskiej",
|
||||
"Czy w ciągu ostatnich 48 godzin spożyłeś leki zawierające paracetamol?",
|
||||
"Kto ma ochotę zapoznać się z innymi niż w książkach przygodami Muminków i ich przyjaciół, temu polecam komiks Tove Jansson „Muminki i morze”."
|
||||
]
|
|
@ -0,0 +1,18 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
>>> from spacy.lang.pt.examples import sentences
|
||||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple está querendo comprar uma startup do Reino Unido por 100 milhões de dólares",
|
||||
"Carros autônomos empurram a responsabilidade do seguro para os fabricantes."
|
||||
"São Francisco considera banir os robôs de entrega que andam pelas calçadas",
|
||||
"Londres é a maior cidade do Reino Unido"
|
||||
]
|
|
@ -0,0 +1,18 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
>>> from spacy.lang.sv.examples import sentences
|
||||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple överväger att köpa brittisk startup för 1 miljard dollar.",
|
||||
"Självkörande bilar förskjuter försäkringsansvar mot tillverkare.",
|
||||
"San Fransisco överväger förbud mot leveransrobotar på trottoarer.".
|
||||
"London är en storstad i Storbritannien."
|
||||
]
|
|
@ -524,6 +524,7 @@ cdef class Parser:
|
|||
scores, bp_scores = vec2scores.begin_update(vector, drop=drop)
|
||||
|
||||
d_scores = self.get_batch_loss(states, golds, scores)
|
||||
d_scores /= len(docs)
|
||||
d_vector = bp_scores(d_scores, sgd=sgd)
|
||||
if drop != 0:
|
||||
d_vector *= mask
|
||||
|
@ -582,7 +583,9 @@ cdef class Parser:
|
|||
width, density,
|
||||
sgd=sgd, drop=drop, losses=losses)
|
||||
backprop_lower = []
|
||||
cdef float batch_size = len(docs)
|
||||
for i, d_scores in enumerate(states_d_scores):
|
||||
d_scores /= batch_size
|
||||
if losses is not None:
|
||||
losses[self.name] += (d_scores**2).sum()
|
||||
ids, bp_vectors, bp_scores = backprops[i]
|
||||
|
|
|
@ -112,6 +112,10 @@
|
|||
.u-nowrap
|
||||
white-space: nowrap
|
||||
|
||||
.u-break.u-break
|
||||
word-wrap: break-word
|
||||
white-space: initial
|
||||
|
||||
.u-no-border
|
||||
border: none
|
||||
|
||||
|
|
|
@ -140,6 +140,44 @@ p Get the number of tokens in the document.
|
|||
+cell int
|
||||
+cell The number of tokens in the document.
|
||||
|
||||
+h(2, "char_span") Doc.char_span
|
||||
+tag method
|
||||
+tag-new(2)
|
||||
|
||||
p Create a #[code Span] object from the slice #[code doc.text[start : end]].
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'I like New York')
|
||||
label = doc.vocab.strings['GPE']
|
||||
span = doc.char_span(7, 15, label=label)
|
||||
assert span.text == 'New York'
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code start]
|
||||
+cell int
|
||||
+cell The index of the first character of the span.
|
||||
|
||||
+row
|
||||
+cell #[code end]
|
||||
+cell int
|
||||
+cell The index of the first character after the span.
|
||||
|
||||
+row
|
||||
+cell #[code label]
|
||||
+cell uint64
|
||||
+cell A label to attach to the Span, e.g. for named entities.
|
||||
|
||||
+row
|
||||
+cell #[code vector]
|
||||
+cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell A meaning representation of the span.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code Span]
|
||||
+cell The newly constructed object.
|
||||
|
||||
+h(2, "similarity") Doc.similarity
|
||||
+tag method
|
||||
+tag-model("vectors")
|
||||
|
@ -211,12 +249,12 @@ p
|
|||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code attr_ids]
|
||||
+cell ints
|
||||
+cell list
|
||||
+cell A list of attribute ID ints.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code numpy.ndarray[ndim=2, dtype='int32']]
|
||||
+cell #[code.u-break numpy.ndarray[ndim=2, dtype='int32']]
|
||||
+cell
|
||||
| The exported attributes as a 2D numpy array, with one row per
|
||||
| token and one column per attribute.
|
||||
|
@ -245,7 +283,7 @@ p
|
|||
|
||||
+row
|
||||
+cell #[code array]
|
||||
+cell #[code numpy.ndarray[ndim=2, dtype='int32']]
|
||||
+cell #[code.u-break numpy.ndarray[ndim=2, dtype='int32']]
|
||||
+cell The attribute values to load.
|
||||
|
||||
+footrow
|
||||
|
@ -509,7 +547,7 @@ p
|
|||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell A 1D numpy array representing the document's semantics.
|
||||
|
||||
+h(2, "vector_norm") Doc.vector_norm
|
||||
|
|
|
@ -111,6 +111,14 @@ p
|
|||
+cell -
|
||||
+cell A sequence of unicode objects.
|
||||
|
||||
+row
|
||||
+cell #[code as_tuples]
|
||||
+cell bool
|
||||
+cell
|
||||
| If set to #[code True], inputs should be a sequence of
|
||||
| #[code (text, context)] tuples. Output will then be a sequence of
|
||||
| #[code (doc, context)] tuples. Defaults to #[code False].
|
||||
|
||||
+row
|
||||
+cell #[code n_threads]
|
||||
+cell int
|
||||
|
|
|
@ -129,7 +129,7 @@ p A real-valued meaning representation.
|
|||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell A 1D numpy array representing the lexeme's semantics.
|
||||
|
||||
+h(2, "vector_norm") Lexeme.vector_norm
|
||||
|
|
|
@ -37,7 +37,7 @@ p Create a Span object from the #[code slice doc[start : end]].
|
|||
|
||||
+row
|
||||
+cell #[code vector]
|
||||
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell A meaning representation of the span.
|
||||
|
||||
+footrow
|
||||
|
@ -145,11 +145,47 @@ p
|
|||
+cell float
|
||||
+cell A scalar similarity score. Higher is more similar.
|
||||
|
||||
+h(2, "to_array") Span.to_array
|
||||
+tag method
|
||||
+tag-new(2)
|
||||
|
||||
p
|
||||
| Given a list of #[code M] attribute IDs, export the tokens to a numpy
|
||||
| #[code ndarray] of shape #[code (N, M)], where #[code N] is the length of
|
||||
| the document. The values will be 32-bit integers.
|
||||
|
||||
+aside-code("Example").
|
||||
from spacy.attrs import LOWER, POS, ENT_TYPE, IS_ALPHA
|
||||
doc = nlp(u'I like New York in Autumn.')
|
||||
span = doc[2:3]
|
||||
# All strings mapped to integers, for easy export to numpy
|
||||
np_array = span.to_array([LOWER, POS, ENT_TYPE, IS_ALPHA])
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code attr_ids]
|
||||
+cell list
|
||||
+cell A list of attribute ID ints.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code.u-break numpy.ndarray[long, ndim=2]]
|
||||
+cell
|
||||
| A feature matrix, with one row per word, and one column per
|
||||
| attribute indicated in the input #[code attr_ids].
|
||||
|
||||
+h(2, "merge") Span.merge
|
||||
+tag method
|
||||
|
||||
p Retokenize the document, such that the span is merged into a single token.
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'I like New York in Autumn.')
|
||||
span = doc[2:3]
|
||||
span.merge()
|
||||
assert len(doc) == 6
|
||||
assert doc[2].text == 'New York'
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code **attributes]
|
||||
|
@ -270,7 +306,7 @@ p
|
|||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell A 1D numpy array representing the span's semantics.
|
||||
|
||||
+h(2, "vector_norm") Span.vector_norm
|
||||
|
|
|
@ -250,7 +250,7 @@ p A real-valued meaning representation.
|
|||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell A 1D numpy array representing the token's semantics.
|
||||
|
||||
+h(2, "vector_norm") Span.vector_norm
|
||||
|
|
Loading…
Reference in New Issue