diff --git a/spacy/tests/doc/test_doc_spilt.py b/spacy/tests/doc/test_doc_spilt.py
index 3a92509da..86757341b 100644
--- a/spacy/tests/doc/test_doc_spilt.py
+++ b/spacy/tests/doc/test_doc_spilt.py
@@ -112,3 +112,17 @@ def test_spans_sentence_update_after_merge(en_vocab):
     sent1, sent2 = list(doc.sents)
     assert len(sent1) == init_len + 1
     assert len(sent2) == init_len2 + 1
+
+
+@pytest.mark.xfail
+def test_split_orths_mismatch(en_vocab):
+    """Test that the regular retokenizer.split raises an error if the orths
+    don't match the original token text. There might still be a method that
+    allows this, but for the default use cases, merging and splitting should
+    always conform with spaCy's non-destructive tokenization policy. Otherwise,
+    it can lead to very confusing and unexpected results.
+    """
+    doc = Doc(en_vocab, words=["LosAngeles", "start", "."])
+    with pytest.raises(ValueError):
+        with doc.retokenize() as retokenizer:
+            retokenizer.split(doc[0], ["L", "A"], [0, -1])