diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index b1b707c6a..e255dbb48 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -74,9 +74,9 @@ cdef Utf8Str* _allocate(Pool mem, const unsigned char* chars, uint32_t length) e
         assert string.s[0] >= sizeof(string.s) or string.s[0] == 0, string.s[0]
         return string
 
- 
+
 cdef class StringStore:
-    """Lookup strings by 64-bit hash"""
+    """Look up strings by 64-bit hashes."""
     def __init__(self, strings=None, freeze=False):
         """Create the StringStore.
 
@@ -92,9 +92,9 @@ cdef class StringStore:
                 self.add(string)
 
     def __getitem__(self, object string_or_id):
-        """Retrieve a string from a given hash ID, or vice versa.
+        """Retrieve a string from a given hash, or vice versa.
 
-        string_or_id (bytes or unicode or uint64): The value to encode.
+        string_or_id (bytes, unicode or uint64): The value to encode.
         Returns (unicode or uint64): The value to be retrieved.
         """
         if isinstance(string_or_id, basestring) and len(string_or_id) == 0:
@@ -123,6 +123,11 @@ cdef class StringStore:
                 return decode_Utf8Str(utf8str)
 
     def add(self, string):
+        """Add a string to the StringStore.
+
+        string (unicode): The string to add.
+        RETURNS (uint64): The string's hash value.
+        """
         if isinstance(string, unicode):
             if string in SYMBOLS_BY_STR:
                 return SYMBOLS_BY_STR[string]
diff --git a/website/assets/img/docs/vocab_stringstore.svg b/website/assets/img/docs/vocab_stringstore.svg
index 644453737..119175247 100644
--- a/website/assets/img/docs/vocab_stringstore.svg
+++ b/website/assets/img/docs/vocab_stringstore.svg
@@ -7,30 +7,30 @@
     </style>
     <rect width="570" height="88" x="1" y="135" fill="#d5e8d4" stroke="#82b366" stroke-width="2" rx="13.2" ry="13.2"/>
     <path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M444 164h100v40H444z"/>
-    <text class="svg__vocab__text" dy="1em" transform="translate(477.5 174.5)" width="31" height="17">3572</text>
+    <text class="svg__vocab__text" dx="-0.5em" dy="1em" transform="translate(477.5 174.5)" width="31" height="17">31979...</text>
     <rect width="52" height="20" x="468" y="152" fill="#666" rx="3" ry="3"/>
     <text class="svg__vocab__text-box" dy="0.9em" width="44" height="12" transform="translate(471.5 155.5)">Lexeme</text>
     <path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M76 164h100v40H76z"/>
-    <text class="svg__vocab__text" dy="1em" width="23" height="17" transform="translate(113.5 174.5)">508</text>
+    <text class="svg__vocab__text" dx="-0.5em" dy="1em" width="23" height="17" transform="translate(113.5 174.5)">46904...</text>
     <rect width="52" height="20" x="100" y="152" fill="#666" rx="3" ry="3"/>
     <text class="svg__vocab__text-box" dy="0.9em" width="44" height="12" transform="translate(103.5 155.5)">Lexeme</text>
     <path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M263 164h100v40H263z"/>
-    <text class="svg__vocab__text" dy="1em" width="23" height="17" transform="translate(300.5 174.5)">949</text>
+    <text class="svg__vocab__text" dx="-0.7em" dy="1em" width="23" height="17" transform="translate(300.5 174.5)">37020...</text>
     <rect width="52" height="20" x="287" y="152" fill="#666" rx="3" ry="3"/>
     <text class="svg__vocab__text-box" dy="0.9em" width="44" height="12" transform="translate(290.5 155.5)">Lexeme</text>
     <rect width="570" height="88" x="1" y="246" fill="#f5f5f5" stroke="#666" stroke-width="2" rx="13.2" ry="13.2"/>
     <path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M444 275h100v40H444z"/>
     <text class="svg__vocab__text" dy="1em" width="55" height="17" transform="translate(465.5 285.5)">&quot;coffee&quot;</text>
     <rect width="52" height="20" x="468" y="263" fill="#666" rx="3" ry="3"/>
-    <text class="svg__vocab__text-box" dy="0.9em" width="28" height="12" transform="translate(479.5 266.5)">3672</text>
+    <text class="svg__vocab__text-box" dx="-0.5em" dy="0.9em" width="28" height="12" transform="translate(479.5 266.5)">31979…</text>
     <path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M76 275h100v40H76z"/>
     <text class="svg__vocab__text" dy="1em" width="17" height="17" transform="translate(116.5 285.5)">&quot;I&quot;</text>
     <rect width="52" height="20" x="100" y="263" fill="#666" rx="3" ry="3"/>
-    <text class="svg__vocab__text-box" dy="0.9em" width="22" height="12"  transform="translate(114.5 266.5)">508</text>
+    <text class="svg__vocab__text-box" dx="-0.7em" dy="0.9em" width="22" height="12"  transform="translate(114.5 266.5)">46904…</text>
     <path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M263 275h100v40H263z"/>
     <text class="svg__vocab__text" dy="1em" width="41" height="17" transform="translate(291.5 285.5)">&quot;love&quot;</text>
     <rect width="52" height="20" x="287" y="263" fill="#666" rx="3" ry="3"/>
-    <text class="svg__vocab__text-box" dy="0.9em" width="22" height="12" transform="translate(301.5 266.5)">949</text>
+    <text class="svg__vocab__text-box" dx="-0.7em" dy="0.9em" width="22" height="12" transform="translate(301.5 266.5)">37020…</text>
     <rect width="570" height="110" x="1" y="1" fill="#e1d5e7" stroke="#9673a6" stroke-width="2" rx="16.5" ry="16.5"/>
     <path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M263 60h-78.8"/>
     <path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M178.2 60l8-4-2 4 2 4z"/>
diff --git a/website/docs/api/stringstore.jade b/website/docs/api/stringstore.jade
index f09352c79..0665f6060 100644
--- a/website/docs/api/stringstore.jade
+++ b/website/docs/api/stringstore.jade
@@ -2,14 +2,16 @@
 
 include ../../_includes/_mixins
 
-p Map strings to and from integer IDs.
+p
+    |  Look up strings by 64-bit hashes. As of v2.0, spaCy uses hash values
+    |  instead of integer IDs. This ensures that strings always map to the
+    |  same ID, even from different #[code StringStores].
 
 +h(2, "init") StringStore.__init__
     +tag method
 
 p
-    |  Create the #[code StringStore]. Note that a newly initialised store will
-    |  always include an empty string #[code ''] at position #[code 0].
+    |  Create the #[code StringStore].
 
 +aside-code("Example").
     from spacy.strings import StringStore
@@ -44,17 +46,18 @@ p Get the number of strings in the store.
 +h(2, "getitem") StringStore.__getitem__
     +tag method
 
-p Retrieve a string from a given integer ID, or vice versa.
+p Retrieve a string from a given hash, or vice versa.
 
 +aside-code("Example").
     stringstore = StringStore([u'apple', u'orange'])
-    int_id = stringstore[u'apple'] # 1
-    assert stringstore[int_id] == u'apple'
+    apple_hash = stringstore[u'apple']
+    assert apple_hash == 8566208034543834098L
+    assert stringstore[apple_hash] == u'apple'
 
 +table(["Name", "Type", "Description"])
     +row
         +cell #[code string_or_id]
-        +cell bytes, unicode or int
+        +cell bytes, unicode or uint64
         +cell The value to encode.
 
     +footrow
@@ -94,7 +97,7 @@ p
 +aside-code("Example").
     stringstore = StringStore([u'apple', u'orange'])
     all_strings = [s for s in stringstore]
-    assert all_strings == [u'', u'apple', u'orange']
+    assert all_strings == [u'apple', u'orange']
 
 +table(["Name", "Type", "Description"])
     +footrow
@@ -102,6 +105,30 @@ p
         +cell unicode
         +cell A string in the store.
 
++h(2, "add") StringStore.add
+    +tag method
+    +tag-new(2)
+
+p Add a string to the #[code StringStore].
+
++aside-code("Example").
+    stringstore = StringStore([u'apple', u'orange'])
+    stringstore.add(u'banana')
+    assert len(stringstore) == 3
+    assert stringstore[u'banana'] == 2525716904149915114L
+
++table(["Name", "Type", "Description"])
+    +row
+        +cell #[code string]
+        +cell unicode
+        +cell The string to add.
+
+    +footrow
+        +cell returns
+        +cell uint64
+        +cell The string's hash value.
+
+
 +h(2, "to_disk") StringStore.to_disk
     +tag method
     +tag-new(2)
diff --git a/website/docs/usage/_spacy-101/_vocab.jade b/website/docs/usage/_spacy-101/_vocab.jade
index dd300b5b9..45a16af80 100644
--- a/website/docs/usage/_spacy-101/_vocab.jade
+++ b/website/docs/usage/_spacy-101/_vocab.jade
@@ -4,10 +4,10 @@ p
     |  Whenever possible, spaCy tries to store data in a vocabulary, the
     |  #[+api("vocab") #[code Vocab]], that will be
     |  #[strong shared by multiple documents]. To save memory, spaCy also
-    |  encodes all strings to #[strong integer IDs] – in this case for example,
-    |  "coffee" has the ID #[code 3672]. Entity labels like "ORG" and
-    |  part-of-speech tags like "VERB" are also encoded. Internally, spaCy
-    |  only "speaks" in integer IDs.
+    |  encodes all strings to #[strong hash values] – in this case for example,
+    |  "coffee" has the hash #[code 3197928453018144401L]. Entity labels like
+    |  "ORG" and part-of-speech tags like "VERB" are also encoded. Internally,
+    |  spaCy only "speaks" in hash values.
 
 +aside
     |  #[strong Token]: A word, punctuation mark etc. #[em in context], including
@@ -16,8 +16,8 @@ p
     |  and flags, e.g. if it's lowercase, a digit or punctuation.#[br]
     |  #[strong Doc]: A processed container of tokens in context.#[br]
     |  #[strong Vocab]: The collection of lexemes.#[br]
-    |  #[strong StringStore]: The dictionary mapping integer IDs to strings, for
-    |  example #[code 3672] &rarr; "coffee".
+    |  #[strong StringStore]: The dictionary mapping hash values to strings, for
+    |  example #[code 3197928453018144401L] &rarr; "coffee".
 
 +image
     include ../../../assets/img/docs/vocab_stringstore.svg
@@ -27,26 +27,26 @@ p
 p
     |  If you process lots of documents containing the word "coffee" in all
     |  kinds of different contexts, storing the exact string "coffee" every time
-    |  would take up way too much space. So instead, spaCy assigns it an ID
+    |  would take up way too much space. So instead, spaCy hashes the string
     |  and stores it in the #[+api("stringstore") #[code StringStore]]. You can
     |  think of the #[code StringStore] as a
     |  #[strong lookup table that works in both directions] – you can look up a
-    |  string to get its ID, or an ID to get its string:
+    |  string to get its hash, or a hash to get its string:
 
 +code.
     doc = nlp(u'I like coffee')
-    assert doc.vocab.strings[u'coffee'] == 3572
-    assert doc.vocab.strings[3572] == u'coffee'
+    assert doc.vocab.strings[u'coffee'] == 3197928453018144401L
+    assert doc.vocab.strings[3197928453018144401L] == u'coffee'
 
 p
     |  Now that all strings are encoded, the entries in the vocabulary
     |  #[strong don&apos;t need to include the word text] themselves. Instead,
-    |  they can look it up in the #[code StringStore] via its integer ID. Each
+    |  they can look it up in the #[code StringStore] via its hash value. Each
     |  entry in the vocabulary, also called #[+api("lexeme") #[code Lexeme]],
     |  contains the #[strong context-independent] information about a word.
     |  For example, no matter if "love" is used as a verb or a noun in some
     |  context, its spelling and whether it consists of alphabetic characters
-    |  won't ever change.
+    |  won't ever change. Its hash value will also always be the same.
 
 +code.
     for word in doc:
@@ -56,39 +56,54 @@ p
 
 +aside
     |  #[strong Text]: The original text of the lexeme.#[br]
-    |  #[strong Orth]: The integer ID of the lexeme.#[br]
+    |  #[strong Orth]: The hash value of the lexeme.#[br]
     |  #[strong Shape]: The abstract word shape of the lexeme.#[br]
     |  #[strong Prefix]: By default, the first letter of the word string.#[br]
     |  #[strong Suffix]: By default, the last three letters of the word string.#[br]
     |  #[strong is alpha]: Does the lexeme consist of alphabetic characters?#[br]
     |  #[strong is digit]: Does the lexeme consist of digits?#[br]
-    |  #[strong is title]: Does the lexeme consist of alphabetic characters?#[br]
-    |  #[strong Lang]: The language of the parent vocabulary.
 
-+table(["text", "orth", "shape", "prefix", "suffix", "is_alpha", "is_digit", "is_title", "lang"])
-    - var style = [0, 1, 1, 0, 0, 1, 1, 1, 0]
-    +annotation-row(["I", 508, "X", "I", "I", true, false, true, "en"], style)
-    +annotation-row(["love", 949, "xxxx", "l", "ove", true, false, false, "en"], style)
-    +annotation-row(["coffee", 3572, "xxxx", "c", "ffe", true, false, false, "en"], style)
++table(["text", "orth", "shape", "prefix", "suffix", "is_alpha", "is_digit"])
+    - var style = [0, 1, 1, 0, 0, 1, 1]
+    +annotation-row(["I", "4690420944186131903L", "X", "I", "I", true, false], style)
+    +annotation-row(["love", "3702023516439754181L", "xxxx", "l", "ove", true, false], style)
+    +annotation-row(["coffee", "3197928453018144401L", "xxxx", "c", "ffe", true, false], style)
 
 p
-    |  The specific entries in the voabulary and their IDs don't really matter –
-    |  #[strong as long as they match]. That's why you always need to make sure
-    |  all objects you create have access to the same vocabulary. If they don't,
-    |  the IDs won't match and spaCy will either produce very confusing results,
-    |  or fail alltogether.
+    |  The mapping of words to hashes doesn't depend on any state. To make sure
+    |  each value is unique, spaCy uses a
+    |  #[+a("https://en.wikipedia.org/wiki/Hash_function") hash function] to
+    |  calculate the hash #[strong based on the word string]. This also means
+    |  that the hash for "coffee" will always be the same, no matter which model
+    |  you're using or how you've configured spaCy.
+
+p
+    |  However, hashes #[strong cannot be reversed] and there's no way to
+    |  resolve #[code 3197928453018144401L] back to "coffee". All spaCy can do
+    |  is look it up in the vocabulary. That's why you always need to make
+    |  sure all objects you create have access to the same vocabulary. If they
+    |  don't, spaCy might not be able to find the strings it needs.
 
 +code.
     from spacy.tokens import Doc
     from spacy.vocab import Vocab
 
     doc = nlp(u'I like coffee') # original Doc
-    new_doc = Doc(Vocab(), words=['I', 'like', 'coffee']) # new Doc with empty Vocab
-    assert doc.vocab.strings[u'coffee'] == 3572 # ID in vocab of Doc
-    assert new_doc.vocab.strings[u'coffee'] == 446 # ID in vocab of new Doc
+    assert doc.vocab.strings[u'coffee'] == 3197928453018144401L # get hash
+    assert doc.vocab.strings[3197928453018144401L] == u'coffee' # 👍
+
+    empty_doc = Doc(Vocab()) # new Doc with empty Vocab
+    # doc.vocab.strings[3197928453018144401L] will raise an error :(
+
+    empty_doc.vocab.strings.add(u'coffee') # add "coffee" and generate hash
+    assert doc.vocab.strings[3197928453018144401L] == u'coffee' # 👍
+
+    new_doc = Doc(doc.vocab) # create new doc with first doc's vocab
+    assert doc.vocab.strings[3197928453018144401L] == u'coffee' # 👍
 
 p
-    |  Even though both #[code Doc] objects contain the same words, the internal
-    |  integer IDs are very different. The same applies for all other strings,
-    |  like the annotation scheme. To avoid mismatched IDs, spaCy will always
-    |  export the vocab if you save a #[code Doc] or #[code nlp] object.
+    |  If the doc's vocabulary doesn't contain a hash for "coffee", spaCy will
+    |  throw an error. So you either need to add it manually, or initialise the
+    |  new #[code Doc] with the shared vocab. To prevent this problem, spaCy
+    |  will ususally export the vocab when you save a #[code Doc] or #[code nlp]
+    |  object.
diff --git a/website/docs/usage/lightning-tour.jade b/website/docs/usage/lightning-tour.jade
index 107e7210f..a87e763a6 100644
--- a/website/docs/usage/lightning-tour.jade
+++ b/website/docs/usage/lightning-tour.jade
@@ -68,13 +68,19 @@ p
     |  #[strong API:] #[+api("token") #[code Token]]
     |  #[strong Usage:] #[+a("/docs/usage/pos-tagging") Part-of-speech tagging]
 
-+h(2, "examples-integer-ids") Use integer IDs for any string
++h(2, "examples-hashes") Use hash values for any string
 
 +code.
-    hello_id = nlp.vocab.strings['Hello']
-    hello_str = nlp.vocab.strings[hello_id]
-    assert token.text  == hello_id  == 3125
-    assert token.text == hello_str == 'Hello'
+    doc = nlp(u'I love coffee')
+    coffee_hash = nlp.vocab.strings[u'coffee'] # 3197928453018144401L
+    coffee_text = nlp.vocab.strings[coffee_hash] # 'coffee'
+
+    assert doc[2].orth == coffee_hash == 3197928453018144401L
+    assert doc[2].text == coffee_text == u'coffee'
+
+    doc.vocab.strings.add(u'beer')
+    beer_hash = doc.vocab.strings[u'beer'] # 3073001599257881079L
+    beer_text = doc.vocab.strings[beer_hash] # 'beer'
 
 +h(2, "examples-entities") Recongnise and update named entities
     +tag-model("NER")
diff --git a/website/docs/usage/v2.jade b/website/docs/usage/v2.jade
index db827c414..afdf50efb 100644
--- a/website/docs/usage/v2.jade
+++ b/website/docs/usage/v2.jade
@@ -50,6 +50,28 @@ p
     |  #[strong API:] #[+api("language") #[code Language]]
     |  #[strong Usage:] #[+a("/docs/usage/language-processing-pipeline") Processing text]
 
++h(3, "features-hash-ids") Hash values instead of integer IDs
+
++aside-code("Example").
+    doc = nlp(u'I love coffee')
+    assert doc.vocab.strings[u'coffee'] == 3197928453018144401L
+    assert doc.vocab.strings[3197928453018144401L] == u'coffee'
+
+    doc.vocab.strings.add(u'beer')
+    assert doc.vocab.strings[u'beer'] == 3073001599257881079L
+
+p
+    |  The #[+api("stringstore") #[code StringStore]] now resolves all strings
+    |  to hash values instead of integer IDs. This means that the string-to-int
+    |  mapping #[strong no longer depends on the vocabulary state], making a lot
+    |  of workflows much simpler, especially during training. Unlike integer IDs
+    |  in spaCy v1.x, hash values will #[strong always match] – even across
+    |  models. Strings can now be added explicitly using the new #[+api("stringstore#add") #[code Stringstore.add]] method.
+
++infobox
+    |  #[strong API:] #[+api("stringstore") #[code StringStore]]
+    |  #[strong Usage:] #[+a("/docs/usage/spacy-101#vocab") Vocab, hashes and lexemes 101]
+
 +h(3, "features-serializer") Saving, loading and serialization
 
 +aside-code("Example").
@@ -307,6 +329,17 @@ p
     nlp.save_to_directory('/model')
     nlp.vocab.dump('/vocab')
 
++h(3, "migrating-strings") Strings and hash values
+
++code-new.
+    nlp.vocab.strings.add(u'coffee')
+    nlp.vocab.strings[u'coffee']       # 3197928453018144401L
+    other_nlp.vocab.strings[u'coffee'] # 3197928453018144401L
+
++code-old.
+    nlp.vocab.strings[u'coffee']       # 3672
+    other_nlp.vocab.strings[u'coffee'] # 40259
+
 +h(3, "migrating-languages") Processing pipelines and language data
 
 p
diff --git a/website/index.jade b/website/index.jade
index 17b564b42..b4e987cfb 100644
--- a/website/index.jade
+++ b/website/index.jade
@@ -97,7 +97,7 @@ include _includes/_mixins
                 +item Part-of-speech tagging
                 +item #[strong Named entity] recognition
                 +item Labelled dependency parsing
-                +item Convenient string-to-int mapping
+                +item Convenient string-to-hash mapping
                 +item Export to numpy data arrays
                 +item GIL-free #[strong multi-threading]
                 +item Efficient binary serialization