diff --git a/spacy/en/__init__.py b/spacy/en/__init__.py
new file mode 100644
index 000000000..1cebec7ba
--- /dev/null
+++ b/spacy/en/__init__.py
@@ -0,0 +1,44 @@
+from __future__ import unicode_literals
+from os import path
+
+from .. import orth
+from ..vocab import Vocab
+from ..tokenizer import Tokenizer
+from ..syntax.parser import GreedyParser
+from ..tokens import Tokens
+from ..morphology import Morphologizer
+from .lemmatizer import Lemmatizer
+from .pos import EnPosTagger
+from .attrs import get_flags
+
+
+def get_lex_props(string):
+    return {'flags': get_flags(string), 'dense': 1}
+
+
+class English(object):
+    def __init__(self, data_dir=None, pos_tag=True, parse=False):
+        if data_dir is None:
+            data_dir = path.join(path.dirname(__file__), 'data')
+        self.vocab = Vocab.from_dir(data_dir, get_lex_props=get_lex_props)
+        self.tokenizer = Tokenizer.from_dir(self.vocab, data_dir)
+        if pos_tag:
+            self.pos_tagger = EnPosTagger(data_dir,
+                                          Morphologizer.from_dir(
+                                              self.vocab.strings,
+                                              Lemmatizer(path.join(data_dir, 'wordnet')),
+                                              data_dir))
+        else:
+            self.pos_tagger = None
+        if parse:
+            self.parser = GreedyParser(data_dir)
+        else:
+            self.parser = None
+
+    def __call__(self, text, pos_tag=True, parse=True):
+        tokens = self.tokenizer.tokenize(text)
+        if self.pos_tagger and pos_tag:
+            self.pos_tagger(tokens)
+        if self.parser and parse:
+            self.parser.parse(tokens)
+        return tokens
diff --git a/spacy/en/attrs.pxd b/spacy/en/attrs.pxd
index 3454e3368..a24aa4ebf 100644
--- a/spacy/en/attrs.pxd
+++ b/spacy/en/attrs.pxd
@@ -1,13 +1,13 @@
-from ..lexeme cimport FLAG0, FLAG1, FLAG2, FLAG3, FLAG4, FLAG5, FLAG6, FLAG7
-from ..lexeme cimport FLAG8, FLAG9
-from ..lexeme cimport ID as _ID
-from ..lexeme cimport SIC as _SIC
-from ..lexeme cimport SHAPE as _SHAPE
-from ..lexeme cimport DENSE as _DENSE
-from ..lexeme cimport SHAPE as _SHAPE
-from ..lexeme cimport PREFIX as _PREFIX
-from ..lexeme cimport SUFFIX as _SUFFIX
-from ..lexeme cimport LEMMA as _LEMMA
+from ..typedefs cimport FLAG0, FLAG1, FLAG2, FLAG3, FLAG4, FLAG5, FLAG6, FLAG7
+from ..typedefs cimport FLAG8, FLAG9
+from ..typedefs cimport ID as _ID
+from ..typedefs cimport SIC as _SIC
+from ..typedefs cimport SHAPE as _SHAPE
+from ..typedefs cimport DENSE as _DENSE
+from ..typedefs cimport SHAPE as _SHAPE
+from ..typedefs cimport PREFIX as _PREFIX
+from ..typedefs cimport SUFFIX as _SUFFIX
+from ..typedefs cimport LEMMA as _LEMMA
 
 
 # Work around the lack of global cpdef variables
diff --git a/spacy/en/pos.pxd b/spacy/en/pos.pxd
index 9a92f411a..99c83d795 100644
--- a/spacy/en/pos.pxd
+++ b/spacy/en/pos.pxd
@@ -3,4 +3,4 @@ from ..morphology cimport Morphologizer
 
 
 cdef class EnPosTagger(Tagger):
-    cdef Morphologizer morphologizer
+    cdef readonly Morphologizer morphologizer
diff --git a/spacy/lexeme.pxd b/spacy/lexeme.pxd
index 5926baa0c..35826ef55 100644
--- a/spacy/lexeme.pxd
+++ b/spacy/lexeme.pxd
@@ -1,88 +1,9 @@
-from .typedefs cimport hash_t, flags_t, id_t, len_t, tag_t, attr_t
+from .typedefs cimport hash_t, flags_t, id_t, len_t, tag_t, attr_t, attr_id_t
+from .typedefs cimport ID, SIC, DENSE, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER, POS_TYPE
 from .structs cimport Lexeme
 from .strings cimport StringStore
 
 
-# Reserve 64 values for flag features
-cpdef enum attr_id_t:
-    FLAG0
-    FLAG1
-    FLAG2
-    FLAG3
-    FLAG4
-    FLAG5
-    FLAG6
-    FLAG7
-    FLAG8
-    FLAG9
-    FLAG10
-    FLAG11
-    FLAG12
-    FLAG13
-    FLAG14
-    FLAG15
-    FLAG16
-    FLAG17
-    FLAG18
-    FLAG19
-    FLAG20
-    FLAG21
-    FLAG22
-    FLAG23
-    FLAG24
-    FLAG25
-    FLAG26
-    FLAG27
-    FLAG28
-    FLAG29
-    FLAG30
-    FLAG31
-    FLAG32
-    FLAG33
-    FLAG34
-    FLAG35
-    FLAG36
-    FLAG37
-    FLAG38
-    FLAG39
-    FLAG40
-    FLAG41
-    FLAG42
-    FLAG43
-    FLAG44
-    FLAG45
-    FLAG46
-    FLAG47
-    FLAG48
-    FLAG49
-    FLAG50
-    FLAG51
-    FLAG52
-    FLAG53
-    FLAG54
-    FLAG55
-    FLAG56
-    FLAG57
-    FLAG58
-    FLAG59
-    FLAG60
-    FLAG61
-    FLAG62
-    FLAG63
-
-    ID
-    SIC
-    DENSE
-    SHAPE
-    PREFIX
-    SUFFIX
-
-    LENGTH
-    CLUSTER
-    POS_TYPE
-    LEMMA
-
-
 cdef Lexeme EMPTY_LEXEME
 
 
diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index f1974cbc9..07bb008f9 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -24,7 +24,6 @@ cpdef Lexeme init(id_t i, unicode string, hash_t hashed,
     lex.prefix = string_store[string[:1]]
     lex.suffix = string_store[string[-3:]]
     lex.shape = string_store[orth.word_shape(string)]
-    lex.dense = string_store[props['dense']]
    
     lex.flags = props.get('flags', 0)
     return lex
diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx
index da4485960..9efee6da3 100644
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@@ -59,9 +59,10 @@ cdef class Morphologizer:
 
     @classmethod
     def from_dir(cls, StringStore strings, object lemmatizer, data_dir):
-        tag_map = None
-        irregulars = None
-        tag_names = None
+        tagger_cfg = json.loads(open(path.join(data_dir, 'pos', 'config.json')).read())
+        tag_map = tagger_cfg['tag_map']
+        tag_names = tagger_cfg['tag_names']
+        irregulars = json.loads(open(path.join(data_dir, 'morphs.json')).read())
         return cls(strings, lemmatizer, tag_map=tag_map, irregulars=irregulars,
                    tag_names=tag_names)
 
diff --git a/spacy/strings.pxd b/spacy/strings.pxd
index 9c16cfe1c..d5b674527 100644
--- a/spacy/strings.pxd
+++ b/spacy/strings.pxd
@@ -11,6 +11,11 @@ cdef inline void slice_unicode(UniStr* s, Py_UNICODE* chars, int start, int end)
     s.key = hash64(s.chars, <int>(s.n * sizeof(Py_UNICODE)), 0)
 
 
+cdef class _SymbolMap:
+    cdef dict _string_to_id
+    cdef list _id_to_string
+
+
 cdef class StringStore:
     cdef Pool mem
     cdef Utf8Str* strings
diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index 24c233cfb..c7aa9c7ac 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -9,13 +9,42 @@ from .typedefs cimport hash_t
 SEPARATOR = '\n|-SEP-|\n'
 
 
+cdef class _SymbolMap:
+    def __init__(self):
+        self._string_to_id = {'': 0}
+        self._id_to_string = ['']
+
+    def __iter__(self):
+        for id_, string in enumerate(self._id_to_string[1:]):
+            yield string, id_
+
+    def __getitem__(self, object string_or_id):
+        cdef bytes byte_string
+        if isinstance(string_or_id, int) or isinstance(string_or_id, long):
+            if string_or_id < 1 or string_or_id >= self.size:
+                raise IndexError(string_or_id)
+            return self._int_to_string[string_or_id]
+        else:
+            string = string_or_id
+            if isinstance(string, unicode):
+                string = string.encode('utf8')
+            if string in self._string_to_id:
+                id_ = self._string_to_id[string]
+            else:
+                id_ = len(self._string_to_id)
+                self._string_to_id[string] = id_
+                self._id_to_string.append(string)
+            return id_
+
+
 cdef class StringStore:
     def __init__(self):
         self.mem = Pool()
         self._map = PreshMap()
         self._resize_at = 10000
         self.strings = <Utf8Str*>self.mem.alloc(self._resize_at, sizeof(Utf8Str))
-        self.size = 1
+        self.pos_tags = _SymbolMap()
+        self.dep_tags = _SymbolMap()
 
     property size:
         def __get__(self):
diff --git a/spacy/syntax/parser.pxd b/spacy/syntax/parser.pxd
index be315059f..6fe9fc58c 100644
--- a/spacy/syntax/parser.pxd
+++ b/spacy/syntax/parser.pxd
@@ -1,11 +1,10 @@
-from libc.stdint cimport uint32_t, uint64_t
 from thinc.features cimport Extractor
 from thinc.learner cimport LinearModel
 
 from .arc_eager cimport TransitionSystem
 
-from ..tokens cimport Tokens, TokenC
-from ._state cimport State
+from ..structs cimport TokenC
+from ..tokens cimport Tokens
 
 
 cdef class GreedyParser:
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 147bf0ce1..f24ed7425 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -3,6 +3,7 @@
 from __future__ import unicode_literals
 
 from os import path
+import re
 
 from cython.operator cimport dereference as deref
 from cython.operator cimport preincrement as preinc
@@ -27,7 +28,7 @@ cdef class Tokenizer:
         self._prefix_re = prefix_re
         self._suffix_re = suffix_re
         self._infix_re = infix_re
-        self.vocab = Vocab(self.get_props)
+        self.vocab = vocab
         self._load_special_tokenization(rules)
 
     @classmethod
@@ -39,11 +40,12 @@ cdef class Tokenizer:
  
         assert path.exists(data_dir) and path.isdir(data_dir)
         rules, prefix_re, suffix_re, infix_re = util.read_lang_data(data_dir)
-        return cls(vocab, rules, prefix_re, suffix_re, infix_re)
+        return cls(vocab, rules, re.compile(prefix_re), re.compile(suffix_re),
+                   re.compile(infix_re))
 
     cpdef Tokens tokens_from_list(self, list strings):
         cdef int length = sum([len(s) for s in strings])
-        cdef Tokens tokens = Tokens(self.vocab.strings, length)
+        cdef Tokens tokens = Tokens(self.vocab, length)
         if length == 0:
             return tokens
         cdef UniStr string_struct
@@ -76,7 +78,7 @@ cdef class Tokenizer:
             tokens (Tokens): A Tokens object, giving access to a sequence of Lexemes.
         """
         cdef int length = len(string)
-        cdef Tokens tokens = Tokens(self.vocab.strings, length)
+        cdef Tokens tokens = Tokens(self.vocab, length)
         if length == 0:
             return tokens
         cdef int i = 0
diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd
index 12eb70cc1..ec16c77d6 100644
--- a/spacy/tokens.pxd
+++ b/spacy/tokens.pxd
@@ -4,11 +4,11 @@ import numpy as np
 cimport numpy as np
 
 from cymem.cymem cimport Pool
+from thinc.typedefs cimport atom_t
 
-from .structs cimport Lexeme, TokenC, Morphology
-
-from .typedefs cimport flags_t, attr_t, flags_t
-
+from .typedefs cimport flags_t
+from .structs cimport Morphology, TokenC, Lexeme
+from .vocab cimport Vocab
 from .strings cimport StringStore
 
 
@@ -22,7 +22,7 @@ ctypedef fused LexemeOrToken:
 
 cdef class Tokens:
     cdef Pool mem
-    cdef StringStore strings
+    cdef Vocab vocab
     cdef list tag_names
 
     cdef TokenC* data
@@ -36,7 +36,7 @@ cdef class Tokens:
 
 
 cdef class Token:
-    cdef public StringStore strings
+    cdef readonly StringStore string_store
     cdef public int i
     cdef public int idx
     cdef int pos
@@ -44,18 +44,18 @@ cdef class Token:
     cdef public int head
     cdef public int dep_tag
 
-    cdef public attr_t id
-    cdef public attr_t cluster
-    cdef public attr_t length
-    cdef public attr_t postype
-    cdef public attr_t sensetype
+    cdef public atom_t id
+    cdef public atom_t cluster
+    cdef public atom_t length
+    cdef public atom_t postype
+    cdef public atom_t sensetype
 
-    cdef public attr_t sic
-    cdef public attr_t norm
-    cdef public attr_t shape
-    cdef public attr_t asciied
-    cdef public attr_t prefix
-    cdef public attr_t suffix
+    cdef public atom_t sic
+    cdef public atom_t norm
+    cdef public atom_t shape
+    cdef public atom_t asciied
+    cdef public atom_t prefix
+    cdef public atom_t suffix
 
     cdef public float prob
 
diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx
index f4b1c952d..5e81c4a4e 100644
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@@ -2,7 +2,9 @@
 from preshed.maps cimport PreshMap
 from preshed.counter cimport PreshCounter
 
-from .lexeme cimport get_attr, EMPTY_LEXEME, LEMMA, attr_id_t
+from .lexeme cimport get_attr, EMPTY_LEXEME
+from .typedefs cimport attr_id_t, attr_t
+from .typedefs cimport LEMMA
 cimport cython
 
 import numpy as np
@@ -30,8 +32,8 @@ cdef class Tokens:
     >>> from spacy.en import EN
     >>> tokens = EN.tokenize('An example sentence.')
     """
-    def __init__(self, StringStore string_store, string_length=0):
-        self.string_store = string_store
+    def __init__(self, Vocab vocab, string_length=0):
+        self.vocab = vocab
         if string_length >= 3:
             size = int(string_length / 3.0)
         else:
@@ -50,7 +52,7 @@ cdef class Tokens:
 
     def __getitem__(self, i):
         bounds_check(i, self.length, PADDING)
-        return Token(self.string_store, i, self.data[i].idx, self.data[i].pos,
+        return Token(self.vocab.strings, i, self.data[i].idx, self.data[i].pos,
                      self.data[i].lemma, self.data[i].head, self.data[i].dep_tag,
                      self.data[i].lex[0])
 
@@ -119,10 +121,10 @@ cdef class Token:
                  int pos, int lemma, int head, int dep_tag, dict lex):
         self.string_store = string_store
         self.idx = idx
-        self.pos = pos
+        self.pos_id = pos
         self.i = i
         self.head = head
-        self.dep_tag = dep_tag
+        self.dep_id = dep_tag
         self.id = lex['id']
 
         self.lemma = lemma
@@ -154,6 +156,9 @@ cdef class Token:
             cdef bytes utf8string = self.string_store[self.lemma]
             return utf8string.decode('utf8')
 
+    property dep:
+        def __get__(self):
+            return self.string_store.dep_tags[self.dep]
     property pos:
         def __get__(self):
-            return self.lang.pos_tagger.tag_names[self.pos]
+            return self.string_store.pos_tags[self.pos]
diff --git a/spacy/typedefs.pxd b/spacy/typedefs.pxd
index f91f55469..4b387be7d 100644
--- a/spacy/typedefs.pxd
+++ b/spacy/typedefs.pxd
@@ -21,6 +21,87 @@ cpdef enum univ_tag_t:
     N_UNIV_TAGS
 
 
+# Reserve 64 values for flag features
+cpdef enum attr_id_t:
+    FLAG0
+    FLAG1
+    FLAG2
+    FLAG3
+    FLAG4
+    FLAG5
+    FLAG6
+    FLAG7
+    FLAG8
+    FLAG9
+    FLAG10
+    FLAG11
+    FLAG12
+    FLAG13
+    FLAG14
+    FLAG15
+    FLAG16
+    FLAG17
+    FLAG18
+    FLAG19
+    FLAG20
+    FLAG21
+    FLAG22
+    FLAG23
+    FLAG24
+    FLAG25
+    FLAG26
+    FLAG27
+    FLAG28
+    FLAG29
+    FLAG30
+    FLAG31
+    FLAG32
+    FLAG33
+    FLAG34
+    FLAG35
+    FLAG36
+    FLAG37
+    FLAG38
+    FLAG39
+    FLAG40
+    FLAG41
+    FLAG42
+    FLAG43
+    FLAG44
+    FLAG45
+    FLAG46
+    FLAG47
+    FLAG48
+    FLAG49
+    FLAG50
+    FLAG51
+    FLAG52
+    FLAG53
+    FLAG54
+    FLAG55
+    FLAG56
+    FLAG57
+    FLAG58
+    FLAG59
+    FLAG60
+    FLAG61
+    FLAG62
+    FLAG63
+
+    ID
+    SIC
+    DENSE
+    SHAPE
+    PREFIX
+    SUFFIX
+
+    LENGTH
+    CLUSTER
+    POS_TYPE
+    LEMMA
+
+
+
 ctypedef uint64_t hash_t
 ctypedef char* utf8_t
 ctypedef uint32_t attr_t