diff --git a/spacy/ml/_layers.py b/spacy/ml/_layers.py
index 7e9150d8b..a752ef49a 100644
--- a/spacy/ml/_layers.py
+++ b/spacy/ml/_layers.py
@@ -79,7 +79,7 @@ def _backprop_precomputable_affine_padding(model, dY, ids):
     # for b in range(nB):
     #     for f in range(nF):
     #         if ids[b, f] < 0:
-    #             d_padding[0, f] += dY[b]
+    #             d_pad[0, f] += dY[b]
     #
     # Which can be rewritten as:
     #
@@ -88,9 +88,13 @@ def _backprop_precomputable_affine_padding(model, dY, ids):
     #
     # I don't know how to avoid the loop without building a whole array :(.
     # Cursed numpy.
+    #
+    # Note by Sofie: rewritten to longer loop because "CuPy only supports slices that consist of one boolean array."
     d_pad = model.ops.alloc((1, nF, nO, nP))
     for b in range(nB):
-        d_pad[0, ids[b] < 0] += dY[b]
+        for f in range(nF):
+            if ids[b, f] < 0:
+                d_pad[0, f] += dY[b]
     return d_pad
 
 
diff --git a/spacy/syntax/_parser_model.pyx b/spacy/syntax/_parser_model.pyx
index 4a1014a09..4f4e5e4b0 100644
--- a/spacy/syntax/_parser_model.pyx
+++ b/spacy/syntax/_parser_model.pyx
@@ -371,8 +371,6 @@ class ParserStepModel(Model):
             self.ops.scatter_add(d_tokvecs, ids,
                 d_state_features)
         # Padded -- see update()
-        if isinstance(self.ops, CupyOps):
-           d_tokvecs = self.ops.to_numpy(d_tokvecs)
         self.bp_tokvecs(d_tokvecs[:-1])
         return d_tokvecs
 
@@ -445,8 +443,7 @@ cdef class precompute_hiddens:
         else:
             cached = gpu_cached
         if not isinstance(lower_model.get_param("b"), numpy.ndarray):
-            # self.bias = lower_model.get_param("b").get(stream=cuda_stream) ???
-            self.bias = lower_model.get_param("b")
+            self.bias = lower_model.get_param("b").get(stream=cuda_stream)
         else:
             self.bias = lower_model.get_param("b")
         self.nF = cached.shape[1]