diff --git a/spacy/strings.pxd b/spacy/strings.pxd
index d22f48ba1..bd5e0f135 100644
--- a/spacy/strings.pxd
+++ b/spacy/strings.pxd
@@ -25,5 +25,8 @@ cdef class StringStore:
     cdef vector[hash_t] keys
     cdef public PreshMap _map
 
-    cdef const Utf8Str* intern_unicode(self, str py_string)
-    cdef const Utf8Str* _intern_utf8(self, char* utf8_string, int length, hash_t* precalculated_hash)
+    cdef const Utf8Str* intern_unicode(self, str py_string, bint allow_transient)
+    cdef const Utf8Str* _intern_utf8(self, char* utf8_string, int length, hash_t* precalculated_hash, bint allow_transient) 
+    cdef vector[hash_t] _transient_keys
+    cdef PreshMap _transient_map
+    cdef Pool _non_temp_mem
diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index 376a13175..5e0bd90c6 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -1,6 +1,10 @@
 # cython: infer_types=True
 # cython: profile=False
 cimport cython
+
+from contextlib import contextmanager
+from typing import Iterator, List, Optional
+
 from libc.stdint cimport uint32_t
 from libc.string cimport memcpy
 from murmurhash.mrmr cimport hash32, hash64
@@ -31,7 +35,7 @@ def get_string_id(key):
     This function optimises for convenience over performance, so shouldn't be
     used in tight loops.
     """
-    cdef hash_t str_hash    
+    cdef hash_t str_hash
     if isinstance(key, str):
         if len(key) == 0:
             return 0
@@ -45,8 +49,8 @@ def get_string_id(key):
     elif _try_coerce_to_hash(key, &str_hash):
         # Coerce the integral key to the expected primitive hash type.
         # This ensures that custom/overloaded "primitive" data types
-        # such as those implemented by numpy are not inadvertently used 
-        # downsteam (as these are internally implemented as custom PyObjects 
+        # such as those implemented by numpy are not inadvertently used
+        # downsteam (as these are internally implemented as custom PyObjects
         # whose comparison operators can incur a significant overhead).
         return str_hash
     else:
@@ -119,7 +123,9 @@ cdef class StringStore:
         strings (iterable): A sequence of unicode strings to add to the store.
         """
         self.mem = Pool()
+        self._non_temp_mem = self.mem
         self._map = PreshMap()
+        self._transient_map = None
         if strings is not None:
             for string in strings:
                 self.add(string)
@@ -152,10 +158,13 @@ cdef class StringStore:
                 return SYMBOLS_BY_INT[str_hash]
             else:
                 utf8str = <Utf8Str*>self._map.get(str_hash)
+                if utf8str is NULL and self._transient_map is not None:
+                    utf8str = <Utf8Str*>self._transient_map.get(str_hash)
         else:
             # TODO: Raise an error instead
             utf8str = <Utf8Str*>self._map.get(string_or_id)
-
+            if utf8str is NULL and self._transient_map is not None:
+                utf8str = <Utf8Str*>self._transient_map.get(str_hash)
         if utf8str is NULL:
             raise KeyError(Errors.E018.format(hash_value=string_or_id))
         else:
@@ -175,10 +184,46 @@ cdef class StringStore:
         else:
             return self[key]
 
-    def add(self, string):
+    def __reduce__(self):
+        strings = list(self.non_transient_keys())
+        return (StringStore, (strings,), None, None, None)
+
+    def __len__(self) -> int:
+        """The number of strings in the store.
+
+        RETURNS (int): The number of strings in the store.
+        """
+        return self._keys.size() + self._transient_keys.size()
+
+    @contextmanager
+    def memory_zone(self, mem: Optional[Pool] = None) -> Pool:
+        """Begin a block where all resources allocated during the block will
+        be freed at the end of it. If a resources was created within the
+        memory zone block, accessing it outside the block is invalid.
+        Behaviour of this invalid access is undefined. Memory zones should
+        not be nested.
+
+        The memory zone is helpful for services that need to process large
+        volumes of text with a defined memory budget.
+        """
+        if mem is None:
+            mem = Pool()
+        self.mem = mem
+        self._transient_map = PreshMap()
+        yield mem
+        self.mem = self._non_temp_mem
+        self._transient_map = None
+        self._transient_keys.clear()
+
+    def add(self, string: str, allow_transient: bool = False) -> int:
         """Add a string to the StringStore.
 
         string (str): The string to add.
+        allow_transient (bool): Allow the string to be stored in the 'transient'
+          map, which will be flushed at the end of the memory zone. Strings
+          encountered during arbitrary text processing should be added
+          with allow_transient=True, while labels and other strings used
+          internally should not.
         RETURNS (uint64): The string's hash value.
         """
         cdef hash_t str_hash
@@ -188,22 +233,26 @@ cdef class StringStore:
 
             string = string.encode("utf8")
             str_hash = hash_utf8(string, len(string))
-            self._intern_utf8(string, len(string), &str_hash)
+            self._intern_utf8(string, len(string), &str_hash, allow_transient)
         elif isinstance(string, bytes):
             if string in SYMBOLS_BY_STR:
                 return SYMBOLS_BY_STR[string]
             str_hash = hash_utf8(string, len(string))
-            self._intern_utf8(string, len(string), &str_hash)
+            self._intern_utf8(string, len(string), &str_hash, allow_transient)
         else:
             raise TypeError(Errors.E017.format(value_type=type(string)))
         return str_hash
 
     def __len__(self):
         """The number of strings in the store.
+        if string in SYMBOLS_BY_STR:
+            return SYMBOLS_BY_STR[string]
+        else:
+            return self._intern_str(string, allow_transient)
 
         RETURNS (int): The number of strings in the store.
         """
-        return self.keys.size()
+        return self.keys.size() + self._transient_keys.size()
 
     def __contains__(self, string_or_id not None):
         """Check whether a string or ID is in the store.
@@ -222,30 +271,70 @@ cdef class StringStore:
             pass
         else:
             # TODO: Raise an error instead
-            return self._map.get(string_or_id) is not NULL
-
+            if self._map.get(string_or_id) is not NULL:
+                return True
+            elif self._transient_map is not None and self._transient_map.get(string_or_id) is not NULL:
+                return True
+            else:
+                return False
         if str_hash < len(SYMBOLS_BY_INT):
             return True
         else:
-            return self._map.get(str_hash) is not NULL
+            if self._map.get(str_hash) is not NULL:
+                return True
+            elif self._transient_map is not None and self._transient_map.get(string_or_id) is not NULL:
+                return True
+            else:
+                return False
 
     def __iter__(self):
         """Iterate over the strings in the store, in order.
 
         YIELDS (str): A string in the store.
         """
+        yield from self.non_transient_keys()
+        yield from self.transient_keys()
+
+    def non_transient_keys(self) -> Iterator[str]:
+        """Iterate over the stored strings in insertion order.
+
+        RETURNS: A list of strings.
+        """
         cdef int i
         cdef hash_t key
         for i in range(self.keys.size()):
             key = self.keys[i]
             utf8str = <Utf8Str*>self._map.get(key)
             yield decode_Utf8Str(utf8str)
-        # TODO: Iterate OOV here?
 
     def __reduce__(self):
         strings = list(self)
         return (StringStore, (strings,), None, None, None)
 
+    def transient_keys(self) -> Iterator[str]:
+        if self._transient_map is None:
+            return []
+        for i in range(self._transient_keys.size()):
+            utf8str = <Utf8Str*>self._transient_map.get(self._transient_keys[i])
+            yield decode_Utf8Str(utf8str)
+
+    def values(self) -> List[int]:
+        """Iterate over the stored strings hashes in insertion order.
+
+        RETURNS: A list of string hashs.
+        """
+        cdef int i
+        hashes = [None] * self._keys.size()
+        for i in range(self._keys.size()):
+            hashes[i] = self._keys[i]
+        if self._transient_map is not None:
+            transient_hashes = [None] * self._transient_keys.size()
+            for i in range(self._transient_keys.size()):
+                transient_hashes[i] = self._transient_keys[i]
+        else:
+            transient_hashes = []
+        return hashes + transient_hashes
+
     def to_disk(self, path):
         """Save the current state to a directory.
 
@@ -269,7 +358,7 @@ cdef class StringStore:
         prev = list(self)
         self._reset_and_load(strings)
         for word in prev:
-            self.add(word)
+            self.add(word, allow_transient=False)
         return self
 
     def to_bytes(self, **kwargs):
@@ -289,7 +378,7 @@ cdef class StringStore:
         prev = list(self)
         self._reset_and_load(strings)
         for word in prev:
-            self.add(word)
+            self.add(word, allow_transient=False)
         return self
 
     def _reset_and_load(self, strings):
@@ -297,22 +386,34 @@ cdef class StringStore:
         self._map = PreshMap()
         self.keys.clear()
         for string in strings:
-            self.add(string)
+            self.add(string, allow_transient=False)
 
-    cdef const Utf8Str* intern_unicode(self, str py_string):
+    cdef const Utf8Str* intern_unicode(self, str py_string, bint allow_transient):
         # 0 means missing, but we don't bother offsetting the index.
         cdef bytes byte_string = py_string.encode("utf8")
-        return self._intern_utf8(byte_string, len(byte_string), NULL)
+        return self._intern_utf8(byte_string, len(byte_string), NULL, allow_transient)
 
     @cython.final
-    cdef const Utf8Str* _intern_utf8(self, char* utf8_string, int length, hash_t* precalculated_hash):
+    cdef const Utf8Str* _intern_utf8(self, char* utf8_string, int length, hash_t* precalculated_hash, bint allow_transient):
         # TODO: This function's API/behaviour is an unholy mess...
         # 0 means missing, but we don't bother offsetting the index.
         cdef hash_t key = precalculated_hash[0] if precalculated_hash is not NULL else hash_utf8(utf8_string, length)
         cdef Utf8Str* value = <Utf8Str*>self._map.get(key)
         if value is not NULL:
             return value
+        if allow_transient and self._transient_map is not None:
+            # If we've already allocated a transient string, and now we
+            # want to intern it permanently, we'll end up with the string
+            # in both places. That seems fine -- I don't see why we need
+            # to remove it from the transient map.
+            value = <Utf8Str*>self._transient_map.get(key)
+            if value is not NULL:
+                return value
         value = _allocate(self.mem, <unsigned char*>utf8_string, length)
-        self._map.set(key, value)
-        self.keys.push_back(key)
+        if allow_transient and self._transient_map is not None:
+            self._transient_map.set(key, value)
+            self._transient_keys.push_back(key)
+        else:
+            self._map.set(key, value)
+            self.keys.push_back(key)
         return value
diff --git a/spacy/tests/vocab_vectors/test_memory_zone.py b/spacy/tests/vocab_vectors/test_memory_zone.py
new file mode 100644
index 000000000..910d2664e
--- /dev/null
+++ b/spacy/tests/vocab_vectors/test_memory_zone.py
@@ -0,0 +1,36 @@
+from spacy.vocab import Vocab
+
+
+def test_memory_zone_no_insertion():
+    vocab = Vocab()
+    with vocab.memory_zone():
+        pass
+    lex = vocab["horse"]
+    assert lex.text == "horse"
+
+
+def test_memory_zone_insertion():
+    vocab = Vocab()
+    _ = vocab["dog"]
+    assert "dog" in vocab
+    assert "horse" not in vocab
+    with vocab.memory_zone():
+        lex = vocab["horse"]
+        assert lex.text == "horse"
+    assert "dog" in vocab
+    assert "horse" not in vocab
+
+
+def test_memory_zone_redundant_insertion():
+    """Test that if we insert an already-existing word while
+    in the memory zone, it stays persistent"""
+    vocab = Vocab()
+    _ = vocab["dog"]
+    assert "dog" in vocab
+    assert "horse" not in vocab
+    with vocab.memory_zone():
+        lex = vocab["horse"]
+        assert lex.text == "horse"
+        _ = vocab["dog"]
+    assert "dog" in vocab
+    assert "horse" not in vocab
diff --git a/spacy/tokenizer.pxd b/spacy/tokenizer.pxd
index a902ebad9..88e4b06b0 100644
--- a/spacy/tokenizer.pxd
+++ b/spacy/tokenizer.pxd
@@ -25,9 +25,7 @@ cdef class Tokenizer:
     cdef PhraseMatcher _special_matcher
     # TODO convert to bool in v4
     cdef int _faster_heuristics
-    # TODO next one is unused and should be removed in v4
-    # https://github.com/explosion/spaCy/pull/9150
-    cdef int _unused_int2
+    cdef public int max_cache_size
 
     cdef Doc _tokenize_affixes(self, str string, bint with_special_cases)
     cdef int _apply_special_cases(self, Doc doc) except -1
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 96545828f..93b7f63ac 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -30,7 +30,7 @@ cdef class Tokenizer:
     """
     def __init__(self, Vocab vocab, rules=None, prefix_search=None,
                  suffix_search=None, infix_finditer=None, token_match=None,
-                 url_match=None, faster_heuristics=True):
+                 url_match=None, faster_heuristics=True, max_cache_size=10000):
         """Create a `Tokenizer`, to create `Doc` objects given unicode text.
 
         vocab (Vocab): A storage container for lexical types.
@@ -50,6 +50,7 @@ cdef class Tokenizer:
         faster_heuristics (bool): Whether to restrict the final
             Matcher-based pass for rules to those containing affixes or space.
             Defaults to True.
+        max_cache_size (int): Maximum number of tokenization chunks to cache.
 
         EXAMPLE:
             >>> tokenizer = Tokenizer(nlp.vocab)
@@ -69,6 +70,7 @@ cdef class Tokenizer:
         self._rules = {}
         self._special_matcher = PhraseMatcher(self.vocab)
         self._load_special_cases(rules)
+        self.max_cache_size = max_cache_size
 
     @property
     def token_match(self):
@@ -397,8 +399,9 @@ cdef class Tokenizer:
                                    has_special, with_special_cases)
         self._attach_tokens(tokens, span, &prefixes, &suffixes, has_special,
                             with_special_cases)
-        self._save_cached(&tokens.c[orig_size], orig_key, has_special,
-                          tokens.length - orig_size)
+        if len(self._cache) < self.max_cache_size:
+            self._save_cached(&tokens.c[orig_size], orig_key, has_special,
+                              tokens.length - orig_size)
 
     cdef str _split_affixes(
         self,
@@ -514,6 +517,9 @@ cdef class Tokenizer:
         if n <= 0:
             # avoid mem alloc of zero length
             return 0
+        # Historically this check was mostly used to avoid caching
+        # chunks that had tokens owned by the Doc. Now that that's
+        # not a thing, I don't think we need this?
         for i in range(n):
             if self.vocab._by_orth.get(tokens[i].lex.orth) == NULL:
                 return 0
diff --git a/spacy/vocab.pxd b/spacy/vocab.pxd
index 43e47af1d..c2bfe12e3 100644
--- a/spacy/vocab.pxd
+++ b/spacy/vocab.pxd
@@ -41,7 +41,9 @@ cdef class Vocab:
     cdef const TokenC* make_fused_token(self, substrings) except NULL
 
     cdef const LexemeC* _new_lexeme(self, Pool mem, str string) except NULL
-    cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
+    cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex, bint is_transient) except -1
     cdef const LexemeC* _new_lexeme(self, Pool mem, str string) except NULL
 
     cdef PreshMap _by_orth
+    cdef Pool _non_temp_mem
+    cdef vector[attr_t] _transient_orths
diff --git a/spacy/vocab.pyi b/spacy/vocab.pyi
index b7ff20348..ee7636f02 100644
--- a/spacy/vocab.pyi
+++ b/spacy/vocab.pyi
@@ -1,6 +1,8 @@
+from contextlib import contextmanager
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Union
 
+from cymem.cymem import Pool
 from thinc.types import Floats1d, FloatsXd
 
 from . import Language
@@ -67,6 +69,8 @@ class Vocab:
     def from_bytes(
         self, bytes_data: bytes, *, exclude: Iterable[str] = ...
     ) -> Vocab: ...
+    @contextmanager
+    def memory_zone(self, mem: Optional[Pool] = None) -> Iterator[Pool]: ...
 
 def pickle_vocab(vocab: Vocab) -> Any: ...
 def unpickle_vocab(
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index 19e6eb005..97ba5d68c 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -1,4 +1,6 @@
 import functools
+from contextlib import ExitStack, contextmanager
+from typing import Iterator, Optional
 
 import numpy
 import srsly
@@ -87,6 +89,12 @@ cdef class Vocab:
         self.lookups = lookups
         self.writing_system = writing_system
         self.get_noun_chunks = get_noun_chunks
+        # During a memory_zone we replace our mem object with one
+        # that's passed to us. We keep a reference to our non-temporary
+        # memory here, in case we need to make an allocation we want to
+        # guarantee is not temporary. This is also how we check whether
+        # we're in a memory zone: we check whether self.mem is self._non_temp_mem
+        self._non_temp_mem = self.mem
 
     @property
     def vectors(self):
@@ -114,6 +122,33 @@ cdef class Vocab:
         """
         return self.length
 
+    @contextmanager
+    def memory_zone(self, mem: Optional[Pool] = None) -> Iterator[Pool]:
+        """Begin a block where resources allocated during the block will
+        be freed at the end of it. If a resources was created within the
+        memory zone block, accessing it outside the block is invalid.
+        Behaviour of this invalid access is undefined. Memory zones should
+        not be nested.
+
+        The memory zone is helpful for services that need to process large
+        volumes of text with a defined memory budget.
+        """
+        if mem is None:
+            mem = Pool()
+        # The ExitStack allows programmatic nested context managers.
+        # We don't know how many we need, so it would be awkward to have
+        # them as nested blocks.
+        with ExitStack() as stack:
+            contexts = [stack.enter_context(self.strings.memory_zone(mem))]
+            if hasattr(self.morphology, "memory_zone"):
+                contexts.append(stack.enter_context(self.morphology.memory_zone(mem)))
+            if hasattr(self._vectors, "memory_zone"):
+                contexts.append(stack.enter_context(self._vectors.memory_zone(mem)))
+            self.mem = mem
+            yield mem
+        self._clear_transient_orths()
+        self.mem = self._non_temp_mem
+
     def add_flag(self, flag_getter, int flag_id=-1):
         """Set a new boolean flag to words in the vocabulary.
 
@@ -148,8 +183,7 @@ cdef class Vocab:
 
     cdef const LexemeC* get(self, Pool mem, str string) except NULL:
         """Get a pointer to a `LexemeC` from the lexicon, creating a new
-        `Lexeme` if necessary using memory acquired from the given pool. If the
-        pool is the lexicon's own memory, the lexeme is saved in the lexicon.
+        `Lexeme` if necessary.
         """
         if string == "":
             return &EMPTY_LEXEME
@@ -180,17 +214,9 @@ cdef class Vocab:
             return self._new_lexeme(mem, self.strings[orth])
 
     cdef const LexemeC* _new_lexeme(self, Pool mem, str string) except NULL:
-        # I think this heuristic is bad, and the Vocab should always
-        # own the lexemes. It avoids weird bugs this way, as it's how the thing
-        # was originally supposed to work. The best solution to the growing
-        # memory use is to periodically reset the vocab, which is an action
-        # that should be up to the user to do (so we don't need to keep track
-        # of the doc ownership).
-        # TODO: Change the C API so that the mem isn't passed in here.
+        # The mem argument is deprecated, replaced by memory zones. Same with
+        # this size heuristic.
         mem = self.mem
-        # if len(string) < 3 or self.length < 10000:
-        #    mem = self.mem
-        cdef bint is_oov = mem is not self.mem
         lex = <LexemeC*>mem.alloc(1, sizeof(LexemeC))
         lex.orth = self.strings.add(string)
         lex.length = len(string)
@@ -202,18 +228,25 @@ cdef class Vocab:
             for attr, func in self.lex_attr_getters.items():
                 value = func(string)
                 if isinstance(value, str):
-                    value = self.strings.add(value)
+                    value = self.strings.add(value, allow_transient=True)
                 if value is not None:
                     Lexeme.set_struct_attr(lex, attr, value)
-        if not is_oov:
-            self._add_lex_to_vocab(lex.orth, lex)
+        self._add_lex_to_vocab(lex.orth, lex, self.mem is not self._non_temp_mem)
         if lex == NULL:
             raise ValueError(Errors.E085.format(string=string))
         return lex
 
-    cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1:
+    cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex, bint is_transient) except -1:
         self._by_orth.set(lex.orth, <void*>lex)
         self.length += 1
+        if is_transient:
+            self._transient_orths.push_back(lex.orth)
+
+    def _clear_transient_orths(self):
+        """Remove transient lexemes from the index (generally at the end of the memory zone)"""
+        for orth in self._transient_orths:
+            self._by_orth.pop(orth)
+        self._transient_orths.clear()
 
     def __contains__(self, key):
         """Check whether the string or int key has an entry in the vocabulary.
@@ -265,7 +298,7 @@ cdef class Vocab:
         """
         cdef attr_t orth
         if isinstance(id_or_string, str):
-            orth = self.strings.add(id_or_string)
+            orth = self.strings.add(id_or_string, allow_transient=True)
         else:
             orth = id_or_string
         return Lexeme(self, orth)
@@ -417,7 +450,7 @@ cdef class Vocab:
         DOCS: https://spacy.io/api/vocab#get_vector
         """
         if isinstance(orth, str):
-            orth = self.strings.add(orth)
+            orth = self.strings.add(orth, allow_transient=True)
         cdef Lexeme lex = self[orth]
         key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
         if self.has_vector(key):
@@ -436,7 +469,7 @@ cdef class Vocab:
         DOCS: https://spacy.io/api/vocab#set_vector
         """
         if isinstance(orth, str):
-            orth = self.strings.add(orth)
+            orth = self.strings.add(orth, allow_transient=False)
         cdef Lexeme lex = self[orth]
         key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
         if self.vectors.is_full and key not in self.vectors:
@@ -460,7 +493,7 @@ cdef class Vocab:
         DOCS: https://spacy.io/api/vocab#has_vector
         """
         if isinstance(orth, str):
-            orth = self.strings.add(orth)
+            orth = self.strings.add(orth, allow_transient=True)
         cdef Lexeme lex = self[orth]
         key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
         return key in self.vectors