2016-10-31 18:04:15 +00:00
|
|
|
//- 💫 DOCS > API > STRINGSTORE
|
|
|
|
|
|
|
|
include ../../_includes/_mixins
|
|
|
|
|
|
|
|
p Map strings to and from integer IDs.
|
|
|
|
|
|
|
|
+h(2, "init") StringStore.__init__
|
|
|
|
+tag method
|
|
|
|
|
2017-05-21 12:18:58 +00:00
|
|
|
p
|
|
|
|
| Create the #[code StringStore]. Note that a newly initialised store will
|
|
|
|
| always include an empty string #[code ''] at position #[code 0].
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
from spacy.strings import StringStore
|
|
|
|
stringstore = StringStore([u'apple', u'orange'])
|
2016-10-31 18:04:15 +00:00
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
+row
|
|
|
|
+cell #[code strings]
|
2017-05-21 12:18:58 +00:00
|
|
|
+cell iterable
|
2016-10-31 18:04:15 +00:00
|
|
|
+cell A sequence of unicode strings to add to the store.
|
|
|
|
|
|
|
|
+footrow
|
2017-05-18 22:02:34 +00:00
|
|
|
+cell returns
|
2016-10-31 18:04:15 +00:00
|
|
|
+cell #[code StringStore]
|
|
|
|
+cell The newly constructed object.
|
|
|
|
|
|
|
|
+h(2, "len") StringStore.__len__
|
|
|
|
+tag method
|
|
|
|
|
|
|
|
p Get the number of strings in the store.
|
|
|
|
|
2017-05-21 12:18:58 +00:00
|
|
|
+aside-code("Example").
|
|
|
|
stringstore = StringStore([u'apple', u'orange'])
|
|
|
|
assert len(stringstore) == 2
|
|
|
|
|
2016-10-31 18:04:15 +00:00
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
+footrow
|
2017-05-18 22:02:34 +00:00
|
|
|
+cell returns
|
2016-10-31 18:04:15 +00:00
|
|
|
+cell int
|
|
|
|
+cell The number of strings in the store.
|
|
|
|
|
|
|
|
+h(2, "getitem") StringStore.__getitem__
|
|
|
|
+tag method
|
|
|
|
|
|
|
|
p Retrieve a string from a given integer ID, or vice versa.
|
|
|
|
|
2017-05-21 12:18:58 +00:00
|
|
|
+aside-code("Example").
|
|
|
|
stringstore = StringStore([u'apple', u'orange'])
|
|
|
|
int_id = stringstore[u'apple'] # 1
|
|
|
|
assert stringstore[int_id] == u'apple'
|
|
|
|
|
2016-10-31 18:04:15 +00:00
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
+row
|
|
|
|
+cell #[code string_or_id]
|
2017-05-21 12:18:58 +00:00
|
|
|
+cell bytes, unicode or int
|
2016-10-31 18:04:15 +00:00
|
|
|
+cell The value to encode.
|
|
|
|
|
|
|
|
+footrow
|
2017-05-18 22:02:34 +00:00
|
|
|
+cell returns
|
2017-05-21 12:18:58 +00:00
|
|
|
+cell unicode or int
|
|
|
|
+cell The value to be retrieved.
|
2016-10-31 18:04:15 +00:00
|
|
|
|
|
|
|
+h(2, "contains") StringStore.__contains__
|
|
|
|
+tag method
|
|
|
|
|
|
|
|
p Check whether a string is in the store.
|
|
|
|
|
2017-05-21 12:18:58 +00:00
|
|
|
+aside-code("Example").
|
|
|
|
stringstore = StringStore([u'apple', u'orange'])
|
|
|
|
assert u'apple' in stringstore == True
|
|
|
|
assert u'cherry' in stringstore == False
|
|
|
|
|
2016-10-31 18:04:15 +00:00
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
+row
|
|
|
|
+cell #[code string]
|
|
|
|
+cell unicode
|
|
|
|
+cell The string to check.
|
|
|
|
|
|
|
|
+footrow
|
2017-05-18 22:02:34 +00:00
|
|
|
+cell returns
|
2016-10-31 18:04:15 +00:00
|
|
|
+cell bool
|
|
|
|
+cell Whether the store contains the string.
|
|
|
|
|
|
|
|
+h(2, "iter") StringStore.__iter__
|
|
|
|
+tag method
|
|
|
|
|
2017-05-21 12:18:58 +00:00
|
|
|
p
|
|
|
|
| Iterate over the strings in the store, in order. Note that a newly
|
|
|
|
| initialised store will always include an empty string #[code ''] at
|
|
|
|
| position #[code 0].
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
stringstore = StringStore([u'apple', u'orange'])
|
|
|
|
all_strings = [s for s in stringstore]
|
|
|
|
assert all_strings == [u'', u'apple', u'orange']
|
2016-10-31 18:04:15 +00:00
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
+footrow
|
2017-05-18 22:02:34 +00:00
|
|
|
+cell yields
|
2016-10-31 18:04:15 +00:00
|
|
|
+cell unicode
|
|
|
|
+cell A string in the store.
|
2017-05-21 12:18:58 +00:00
|
|
|
|
|
|
|
+h(2, "to_disk") StringStore.to_disk
|
|
|
|
+tag method
|
2017-05-26 10:42:36 +00:00
|
|
|
+tag-new(2)
|
2017-05-21 12:18:58 +00:00
|
|
|
|
|
|
|
p Save the current state to a directory.
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
stringstore.to_disk('/path/to/strings')
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
+row
|
|
|
|
+cell #[code path]
|
|
|
|
+cell unicode or #[code Path]
|
|
|
|
+cell
|
|
|
|
| A path to a directory, which will be created if it doesn't exist.
|
|
|
|
| Paths may be either strings or #[code Path]-like objects.
|
|
|
|
|
|
|
|
+h(2, "from_disk") Tokenizer.from_disk
|
|
|
|
+tag method
|
2017-05-26 10:42:36 +00:00
|
|
|
+tag-new(2)
|
2017-05-21 12:18:58 +00:00
|
|
|
|
|
|
|
p Loads state from a directory. Modifies the object in place and returns it.
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
from spacy.strings import StringStore
|
|
|
|
stringstore = StringStore().from_disk('/path/to/strings')
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
+row
|
|
|
|
+cell #[code path]
|
|
|
|
+cell unicode or #[code Path]
|
|
|
|
+cell
|
|
|
|
| A path to a directory. Paths may be either strings or
|
|
|
|
| #[code Path]-like objects.
|
|
|
|
|
|
|
|
+footrow
|
|
|
|
+cell returns
|
|
|
|
+cell #[code Tokenizer]
|
|
|
|
+cell The modified #[code Tokenizer] object.
|
|
|
|
|
|
|
|
+h(2, "to_bytes") Tokenizer.to_bytes
|
|
|
|
+tag method
|
|
|
|
|
|
|
|
p Serialize the current state to a binary string.
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
store_bytes = stringstore.to_bytes()
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
+row
|
|
|
|
+cell #[code **exclude]
|
|
|
|
+cell -
|
|
|
|
+cell Named attributes to prevent from being serialized.
|
|
|
|
|
|
|
|
+footrow
|
|
|
|
+cell returns
|
|
|
|
+cell bytes
|
|
|
|
+cell The serialized form of the #[code Tokenizer] object.
|
|
|
|
|
|
|
|
+h(2, "from_bytes") Tokenizer.from_bytes
|
|
|
|
+tag method
|
|
|
|
|
|
|
|
p Load state from a binary string.
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
fron spacy.strings import StringStore
|
|
|
|
store_bytes = stringstore.to_bytes()
|
|
|
|
new_store = StringStore().from_bytes(store_bytes)
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
+row
|
|
|
|
+cell #[code bytes_data]
|
|
|
|
+cell bytes
|
|
|
|
+cell The data to load from.
|
|
|
|
|
|
|
|
+row
|
|
|
|
+cell #[code **exclude]
|
|
|
|
+cell -
|
|
|
|
+cell Named attributes to prevent from being loaded.
|
|
|
|
|
|
|
|
+footrow
|
|
|
|
+cell returns
|
|
|
|
+cell #[code StringStore]
|
|
|
|
+cell The #[code StringStore] object.
|