diff --git a/spacy/lang.pxd b/spacy/lang.pxd
index b03024847..ba9d0a779 100644
--- a/spacy/lang.pxd
+++ b/spacy/lang.pxd
@@ -27,8 +27,6 @@ cdef class Lexicon:
     cpdef readonly size_t size
     cpdef readonly StringStore strings
 
-    cdef vector[Lexeme*] lexemes
-
     cpdef Lexeme lookup(self, unicode string)
     cdef Lexeme* get(self, String* s) except NULL
     
diff --git a/spacy/lang.pyx b/spacy/lang.pyx
index a09c28172..9323dc052 100644
--- a/spacy/lang.pyx
+++ b/spacy/lang.pyx
@@ -45,8 +45,9 @@ cdef class Language:
         self.suffix_re = re.compile(suffix)
         self.infix_re = re.compile(infix)
         self.lexicon = Lexicon(lexemes)
-        self.lexicon.load(path.join(util.DATA_DIR, name, 'lexemes'))
-        self.lexicon.strings.load(path.join(util.DATA_DIR, name, 'strings'))
+        if path.exists(path.join(util.DATA_DIR, name, 'lexemes')):
+            self.lexicon.load(path.join(util.DATA_DIR, name, 'lexemes'))
+            self.lexicon.strings.load(path.join(util.DATA_DIR, name, 'strings'))
         self._load_special_tokenization(rules)
 
     cpdef Tokens tokenize(self, unicode string):
@@ -240,18 +241,16 @@ cdef class Lexicon:
         for py_string, lexeme_dict in lexemes.iteritems():
             string_from_unicode(&string, py_string)
             lexeme = <Lexeme*>self.mem.alloc(1, sizeof(Lexeme))
-            lexeme[0] = lexeme_init(string.chars[:string.n], string.key, self.size,
-                                    self.strings, lexeme_dict)
-            self._dict.set(lexeme.hash, lexeme)
-            self.lexemes.push_back(lexeme)
+            lexeme[0] = lexeme_init(string.chars[:string.n], string.key, self.strings,
+                                    lexeme_dict)
+            self._dict.set(string.key, lexeme)
             self.size += 1
 
     def set(self, unicode py_string, dict lexeme_dict):
         cdef String string
         string_from_unicode(&string, py_string)
         cdef Lexeme* lex = self.get(&string)
-        lex[0] = lexeme_init(string.chars[:string.n], string.key, lex.i,
-                             self.strings, lexeme_dict)
+        lex[0] = lexeme_init(string.chars[:string.n], string.key, self.strings, lexeme_dict)
 
     cdef Lexeme* get(self, String* string) except NULL:
         cdef Lexeme* lex
@@ -259,10 +258,8 @@ cdef class Lexicon:
         if lex != NULL:
             return lex
         lex = <Lexeme*>self.mem.alloc(sizeof(Lexeme), 1)
-        lex[0] = lexeme_init(string.chars[:string.n], string.key, self.size,
-                             self.strings, {})
-        self._dict.set(lex.hash, lex)
-        self.lexemes.push_back(lex)
+        lex[0] = lexeme_init(string.chars[:string.n], string.key, self.strings, {})
+        self._dict.set(string.key, lex)
         self.size += 1
         return lex
 
@@ -287,8 +284,15 @@ cdef class Lexicon:
         cdef FILE* fp = fopen(<char*>bytes_loc, 'wb')
         assert fp != NULL
         cdef size_t st
-        for i in range(self.size-1):
-            st = fwrite(self.lexemes[i], sizeof(Lexeme), 1, fp)
+        cdef hash_t key
+        for i in range(self._dict.length):
+            key = self._dict.c_map.cells[i].key
+            if key == 0:
+                continue
+            lexeme = <Lexeme*>self._dict.c_map.cells[i].value
+            st = fwrite(&key, sizeof(key), 1, fp)
+            assert st == 1
+            st = fwrite(lexeme, sizeof(Lexeme), 1, fp)
             assert st == 1
         st = fclose(fp)
         assert st == 0
@@ -300,14 +304,17 @@ cdef class Lexicon:
         assert fp != NULL
         cdef size_t st
         cdef Lexeme* lexeme
+        cdef hash_t key
         i = 0
         while True:
+            st = fread(&key, sizeof(key), 1, fp)
+            if st != 1:
+                break
             lexeme = <Lexeme*>self.mem.alloc(sizeof(Lexeme), 1)
             st = fread(lexeme, sizeof(Lexeme), 1, fp)
             if st != 1:
                 break
-            self.lexemes.push_back(lexeme)
-            self._dict.set(lexeme.hash, lexeme)
+            self._dict.set(key, lexeme)
             i += 1
         print "Load %d lexemes" % i
         fclose(fp)
diff --git a/spacy/lexeme.pxd b/spacy/lexeme.pxd
index 235883e2a..b39a32522 100644
--- a/spacy/lexeme.pxd
+++ b/spacy/lexeme.pxd
@@ -23,8 +23,6 @@ cpdef enum:
 
 
 cdef struct Lexeme:
-    hash_t hash
-    atom_t i
     atom_t length
    
     atom_t sic
@@ -46,7 +44,7 @@ cdef struct Lexeme:
 
 cdef Lexeme EMPTY_LEXEME
 
-cpdef Lexeme init(unicode string, hash_t hashed, atom_t i,
+cpdef Lexeme init(unicode string, hash_t hashed,
                   StringStore store, dict props) except *
  
 
diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index 887210225..6760b3913 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -26,11 +26,9 @@ def get_flags(unicode string, float upper_pc, float title_pc, float lower_pc):
     return flags
 
 
-cpdef Lexeme init(unicode string, hash_t hashed, atom_t i,
+cpdef Lexeme init(unicode string, hash_t hashed,
                   StringStore store, dict props) except *:
     cdef Lexeme lex
-    lex.hash = hashed
-    lex.i = i
     lex.length = len(string)
     lex.sic = get_string_id(string, store)
     
diff --git a/spacy/pos.pyx b/spacy/pos.pyx
index 8722a1639..bd366c4c3 100644
--- a/spacy/pos.pyx
+++ b/spacy/pos.pyx
@@ -128,7 +128,7 @@ cdef int get_atoms(atom_t* atoms, Lexeme* p2, Lexeme* p1, Lexeme* n0, Lexeme* n1
 
 
 cdef inline void _fill_token(atom_t* atoms, Lexeme* lex) nogil:
-    atoms[0] = lex.i
+    atoms[0] = lex.sic
     atoms[1] = lex.cluster
     atoms[2] = lex.norm
     atoms[3] = lex.shape
diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd
index d6b655074..616353e8f 100644
--- a/spacy/tokens.pxd
+++ b/spacy/tokens.pxd
@@ -31,7 +31,6 @@ cdef class Token:
     cdef public int idx
     cdef public int pos
 
-    cdef public atom_t id
     cdef public atom_t cluster
     cdef public atom_t length
     cdef public atom_t lex_pos
diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx
index 6abfd5b6a..f1a96d2ae 100644
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@@ -107,7 +107,6 @@ cdef class Token:
         self.idx = idx
         self.pos = pos
         
-        self.id = lex['i']
         self.cluster = lex['cluster']
         self.length = lex['length']
         self.lex_pos = lex['pos']