From 2f6b8132ef81c8cb6863f378c8fdead2258fae03 Mon Sep 17 00:00:00 2001 From: Martino Mensio Date: Mon, 27 Jul 2020 09:44:33 +0200 Subject: [PATCH] Sentence transformers added to spaCy universe (#5814) * fix details for spacy-universal-sentence-encoder * added sentence-transformers --- website/meta/universe.json | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/website/meta/universe.json b/website/meta/universe.json index d6e55a2ef..8790cd6af 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -1,5 +1,30 @@ { "resources": [ + { + "id": "spacy-sentence-bert", + "title": "SpaCy - sentence-transformers", + "slogan": "Pipelines for pretrained sentence-transformers (BERT, RoBERTa, XLM-RoBERTa & Co.) directly within SpaCy", + "description": "This library lets you use the embeddings from [sentence-transformers](https://github.com/UKPLab/sentence-transformers) of Docs, Spans and Tokens directly from spaCy. Most models are for the english language but three of them are multilingual.", + "github": "MartinoMensio/spacy-sentence-bert", + "pip": "spacy-sentence-bert", + "code_example": [ + "import spacy_sentence_bert", + "# load one of the models listed at https://github.com/MartinoMensio/spacy-sentence-bert/", + "nlp = spacy_sentence_bert.load_model('en_roberta_large_nli_stsb_mean_tokens')", + "# get two documents", + "doc_1 = nlp('Hi there, how are you?')", + "doc_2 = nlp('Hello there, how are you doing today?')", + "# use the similarity method that is based on the vectors, on Doc, Span or Token", + "print(doc_1.similarity(doc_2[0:7]))" + ], + "category": ["models", "pipeline"], + "author": "Martino Mensio", + "author_links": { + "twitter": "MartinoMensio", + "github": "MartinoMensio", + "website": "https://martinomensio.github.io" + } + }, { "id": "spacy-streamlit", "title": "spacy-streamlit", @@ -58,10 +83,11 @@ "title": "SpaCy - Universal Sentence Encoder", "slogan": "Make use of Google's Universal Sentence Encoder directly within SpaCy", "description": "This library lets you use Universal Sentence Encoder embeddings of Docs, Spans and Tokens directly from TensorFlow Hub", - "github": "MartinoMensio/spacy-universal-sentence-encoder-tfhub", + "github": "MartinoMensio/spacy-universal-sentence-encoder", + "pip": "spacy-universal-sentence-encoder", "code_example": [ "import spacy_universal_sentence_encoder", - "load one of the models: ['en_use_md', 'en_use_lg', 'xx_use_md', 'xx_use_lg']", + "# load one of the models: ['en_use_md', 'en_use_lg', 'xx_use_md', 'xx_use_lg']", "nlp = spacy_universal_sentence_encoder.load_model('en_use_lg')", "# get two documents", "doc_1 = nlp('Hi there, how are you?')",