diff --git a/examples/pipeline/wikidata_entity_linking.py b/examples/pipeline/wikidata_entity_linking.py
index 2759da135..9dc2e514f 100644
--- a/examples/pipeline/wikidata_entity_linking.py
+++ b/examples/pipeline/wikidata_entity_linking.py
@@ -61,22 +61,23 @@ def run_pipeline():
     to_create_kb = False
 
     # read KB back in from file
-    to_read_kb = True
+    to_read_kb = False
     to_test_kb = False
 
     # create training dataset
     create_wp_training = False
 
     # train the EL pipe
-    train_pipe = True
-    measure_performance = True
+    train_pipe = False
+    measure_performance = False
 
     # test the EL pipe on a simple example
-    to_test_pipeline = True
+    to_test_pipeline = False
 
     # write the NLP object, read back in and test again
     to_write_nlp = False
-    to_read_nlp = False
+    to_read_nlp = True
+    test_from_file = True
 
     # STEP 1 : create prior probabilities from WP (run only once)
     if to_create_prior_probs:
@@ -134,21 +135,21 @@ def run_pipeline():
                                              training_output=TRAINING_DIR)
 
     # STEP 6: create and train the entity linking pipe
-    el_pipe = nlp_2.create_pipe(name='entity_linker', config={})
-    el_pipe.set_kb(kb_2)
-    nlp_2.add_pipe(el_pipe, last=True)
-
-    other_pipes = [pipe for pipe in nlp_2.pipe_names if pipe != "entity_linker"]
-    with nlp_2.disable_pipes(*other_pipes):  # only train Entity Linking
-        optimizer = nlp_2.begin_training()
-        optimizer.learn_rate = LEARN_RATE
-        optimizer.L2 = L2
-
     if train_pipe:
+        el_pipe = nlp_2.create_pipe(name='entity_linker', config={})
+        el_pipe.set_kb(kb_2)
+        nlp_2.add_pipe(el_pipe, last=True)
+
+        other_pipes = [pipe for pipe in nlp_2.pipe_names if pipe != "entity_linker"]
+        with nlp_2.disable_pipes(*other_pipes):  # only train Entity Linking
+            optimizer = nlp_2.begin_training()
+            optimizer.learn_rate = LEARN_RATE
+            optimizer.L2 = L2
+
         print("STEP 6: training Entity Linking pipe", datetime.datetime.now())
         # define the size (nr of entities) of training and dev set
         train_limit = 5000
-        dev_limit = 5000
+        dev_limit = 10000
 
         train_data = training_set_creator.read_training(nlp=nlp_2,
                                                         training_dir=TRAINING_DIR,
@@ -230,40 +231,56 @@ def run_pipeline():
             el_pipe.context_weight = 1
             el_pipe.prior_weight = 1
 
-    # STEP 8: apply the EL pipe on a toy example
-    if to_test_pipeline:
-        print()
-        print("STEP 8: applying Entity Linking to toy example", datetime.datetime.now())
-        print()
-        run_el_toy_example(nlp=nlp_2)
+        # STEP 8: apply the EL pipe on a toy example
+        if to_test_pipeline:
+            print()
+            print("STEP 8: applying Entity Linking to toy example", datetime.datetime.now())
+            print()
+            run_el_toy_example(nlp=nlp_2)
 
-    # STEP 9: write the NLP pipeline (including entity linker) to file
-    if to_write_nlp:
-        print()
-        print("STEP 9: testing NLP IO", datetime.datetime.now())
-        print()
-        print("writing to", NLP_2_DIR)
-        nlp_2.to_disk(NLP_2_DIR)
-        print()
+        # STEP 9: write the NLP pipeline (including entity linker) to file
+        if to_write_nlp:
+            print()
+            print("STEP 9: testing NLP IO", datetime.datetime.now())
+            print()
+            print("writing to", NLP_2_DIR)
+            nlp_2.to_disk(NLP_2_DIR)
+            print()
+
+    # verify that the IO has gone correctly
+    if to_read_nlp:
         print("reading from", NLP_2_DIR)
         nlp_3 = spacy.load(NLP_2_DIR)
 
-        # verify that the IO has gone correctly
-        if to_read_nlp:
+        if test_from_file:
+            dev_limit = 5000
+            dev_data = training_set_creator.read_training(nlp=nlp_3,
+                                                          training_dir=TRAINING_DIR,
+                                                          dev=True,
+                                                          limit=dev_limit)
+
+            print("Dev testing from file on", len(dev_data), "articles")
             print()
-            print("running toy example with NLP 2")
+
+            dev_acc_combo, dev_acc_combo_dict = _measure_accuracy(dev_data)
+            print("dev acc combo avg:", round(dev_acc_combo, 3),
+                  [(x, round(y, 3)) for x, y in dev_acc_combo_dict.items()])
+        else:
+            print("running toy example with NLP 3")
             run_el_toy_example(nlp=nlp_3)
 
     print()
     print("STOP", datetime.datetime.now())
 
 
-def _measure_accuracy(data, el_pipe):
+def _measure_accuracy(data, el_pipe=None):
+    # If the docs in the data require further processing with an entity linker, set el_pipe
     correct_by_label = dict()
     incorrect_by_label = dict()
 
     docs = [d for d, g in data if len(d) > 0]
-    docs = el_pipe.pipe(docs)
+    if el_pipe is not None:
+        docs = el_pipe.pipe(docs)
     golds = [g for d, g in data if len(d) > 0]
 
     for doc, gold in zip(docs, golds):
diff --git a/spacy/_ml.py b/spacy/_ml.py
index 9139152aa..82db0fc05 100644
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@@ -655,23 +655,32 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False,
 def build_nel_encoder(in_width, hidden_width, end_width, **cfg):
     conv_depth = cfg.get("conv_depth", 2)
     cnn_maxout_pieces = cfg.get("cnn_maxout_pieces", 3)
+    pretrained_vectors = cfg.get("pretrained_vectors")   # self.nlp.vocab.vectors.name
+
+    tok2vec = Tok2Vec(width=hidden_width, embed_size=in_width, pretrained_vectors=pretrained_vectors,
+                      cnn_maxout_pieces=cnn_maxout_pieces, subword_features=False, conv_depth=conv_depth, bilstm_depth=0)
 
     with Model.define_operators({">>": chain, "**": clone}):
-        convolution = Residual((ExtractWindow(nW=1) >>
-                                LN(Maxout(hidden_width, hidden_width * 3, pieces=cnn_maxout_pieces))))
+        # convolution = Residual((ExtractWindow(nW=1) >>
+        #                         LN(Maxout(hidden_width, hidden_width * 3, pieces=cnn_maxout_pieces))))
 
-        encoder = SpacyVectors \
-                  >> with_flatten(Affine(hidden_width, in_width))\
-                  >> with_flatten(LN(Maxout(hidden_width, hidden_width)) >> convolution ** conv_depth, pad=conv_depth) \
-                  >> flatten_add_lengths \
-                  >> ParametricAttention(hidden_width) \
-                  >> Pooling(sum_pool) \
+        # encoder = SpacyVectors \
+        #           >> with_flatten(Affine(hidden_width, in_width)) \
+        #           >> with_flatten(LN(Maxout(hidden_width, hidden_width)) >> convolution ** conv_depth, pad=conv_depth) \
+        #          >> flatten_add_lengths \
+        #          >> ParametricAttention(hidden_width) \
+        #          >> Pooling(sum_pool) \
+        #          >> Residual(zero_init(Maxout(hidden_width, hidden_width))) \
+        #          >> zero_init(Affine(end_width, hidden_width, drop_factor=0.0))
+
+        encoder = tok2vec >> flatten_add_lengths >> Pooling(mean_pool)\
                   >> Residual(zero_init(Maxout(hidden_width, hidden_width))) \
                   >> zero_init(Affine(end_width, hidden_width, drop_factor=0.0))
 
         # TODO: ReLu or LN(Maxout)  ?
         # sum_pool or mean_pool ?
 
+    encoder.tok2vec = tok2vec
     encoder.nO = end_width
     return encoder