spaCy/website/api/stringstore.jade

240 lines
5.9 KiB
Plaintext
Raw Normal View History

2016-10-31 18:04:15 +00:00
//- 💫 DOCS > API > STRINGSTORE
2017-10-03 12:27:22 +00:00
include ../_includes/_mixins
2016-10-31 18:04:15 +00:00
p
| Look up strings by 64-bit hashes. As of v2.0, spaCy uses hash values
| instead of integer IDs. This ensures that strings always map to the
| same ID, even from different #[code StringStores].
2016-10-31 18:04:15 +00:00
+h(2, "init") StringStore.__init__
+tag method
p
| Create the #[code StringStore].
+aside-code("Example").
from spacy.strings import StringStore
stringstore = StringStore([u'apple', u'orange'])
2016-10-31 18:04:15 +00:00
+table(["Name", "Type", "Description"])
+row
+cell #[code strings]
+cell iterable
2016-10-31 18:04:15 +00:00
+cell A sequence of unicode strings to add to the store.
2017-10-03 12:27:22 +00:00
+row("foot")
+cell returns
2016-10-31 18:04:15 +00:00
+cell #[code StringStore]
+cell The newly constructed object.
+h(2, "len") StringStore.__len__
+tag method
p Get the number of strings in the store.
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
assert len(stringstore) == 2
2016-10-31 18:04:15 +00:00
+table(["Name", "Type", "Description"])
2017-10-03 12:27:22 +00:00
+row("foot")
+cell returns
2016-10-31 18:04:15 +00:00
+cell int
+cell The number of strings in the store.
+h(2, "getitem") StringStore.__getitem__
+tag method
p Retrieve a string from a given hash, or vice versa.
2016-10-31 18:04:15 +00:00
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
apple_hash = stringstore[u'apple']
2017-05-28 23:06:49 +00:00
assert apple_hash == 8566208034543834098
assert stringstore[apple_hash] == u'apple'
2016-10-31 18:04:15 +00:00
+table(["Name", "Type", "Description"])
+row
+cell #[code string_or_id]
+cell bytes, unicode or uint64
2016-10-31 18:04:15 +00:00
+cell The value to encode.
2017-10-03 12:27:22 +00:00
+row("foot")
+cell returns
+cell unicode or int
+cell The value to be retrieved.
2016-10-31 18:04:15 +00:00
+h(2, "contains") StringStore.__contains__
+tag method
p Check whether a string is in the store.
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
2017-05-28 23:06:49 +00:00
assert u'apple' in stringstore
assert not u'cherry' in stringstore
2016-10-31 18:04:15 +00:00
+table(["Name", "Type", "Description"])
+row
+cell #[code string]
+cell unicode
+cell The string to check.
2017-10-03 12:27:22 +00:00
+row("foot")
+cell returns
2016-10-31 18:04:15 +00:00
+cell bool
+cell Whether the store contains the string.
+h(2, "iter") StringStore.__iter__
+tag method
p
| Iterate over the strings in the store, in order. Note that a newly
| initialised store will always include an empty string #[code ''] at
| position #[code 0].
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
all_strings = [s for s in stringstore]
assert all_strings == [u'apple', u'orange']
2016-10-31 18:04:15 +00:00
+table(["Name", "Type", "Description"])
2017-10-03 12:27:22 +00:00
+row("foot")
+cell yields
2016-10-31 18:04:15 +00:00
+cell unicode
+cell A string in the store.
+h(2, "add") StringStore.add
+tag method
+tag-new(2)
p Add a string to the #[code StringStore].
+aside-code("Example").
stringstore = StringStore([u'apple', u'orange'])
2017-05-28 17:42:44 +00:00
banana_hash = stringstore.add(u'banana')
assert len(stringstore) == 3
2017-05-28 23:06:49 +00:00
assert banana_hash == 2525716904149915114
2017-05-28 17:42:44 +00:00
assert stringstore[banana_hash] == u'banana'
assert stringstore[u'banana'] == banana_hash
+table(["Name", "Type", "Description"])
+row
+cell #[code string]
+cell unicode
+cell The string to add.
2017-10-03 12:27:22 +00:00
+row("foot")
+cell returns
+cell uint64
+cell The string's hash value.
+h(2, "to_disk") StringStore.to_disk
+tag method
+tag-new(2)
p Save the current state to a directory.
+aside-code("Example").
stringstore.to_disk('/path/to/strings')
+table(["Name", "Type", "Description"])
+row
+cell #[code path]
+cell unicode or #[code Path]
+cell
| A path to a directory, which will be created if it doesn't exist.
| Paths may be either strings or #[code Path]-like objects.
2017-05-26 10:43:16 +00:00
+h(2, "from_disk") StringStore.from_disk
+tag method
+tag-new(2)
p Loads state from a directory. Modifies the object in place and returns it.
+aside-code("Example").
from spacy.strings import StringStore
stringstore = StringStore().from_disk('/path/to/strings')
+table(["Name", "Type", "Description"])
+row
+cell #[code path]
+cell unicode or #[code Path]
+cell
| A path to a directory. Paths may be either strings or
| #[code Path]-like objects.
2017-10-03 12:27:22 +00:00
+row("foot")
+cell returns
2017-05-26 10:43:16 +00:00
+cell #[code StringStore]
+cell The modified #[code StringStore] object.
2017-05-26 10:43:16 +00:00
+h(2, "to_bytes") StringStore.to_bytes
+tag method
p Serialize the current state to a binary string.
+aside-code("Example").
store_bytes = stringstore.to_bytes()
+table(["Name", "Type", "Description"])
+row
+cell #[code **exclude]
+cell -
+cell Named attributes to prevent from being serialized.
2017-10-03 12:27:22 +00:00
+row("foot")
+cell returns
+cell bytes
2017-05-26 10:43:16 +00:00
+cell The serialized form of the #[code StringStore] object.
2017-05-26 10:43:16 +00:00
+h(2, "from_bytes") StringStore.from_bytes
+tag method
p Load state from a binary string.
+aside-code("Example").
fron spacy.strings import StringStore
store_bytes = stringstore.to_bytes()
new_store = StringStore().from_bytes(store_bytes)
+table(["Name", "Type", "Description"])
+row
+cell #[code bytes_data]
+cell bytes
+cell The data to load from.
+row
+cell #[code **exclude]
+cell -
+cell Named attributes to prevent from being loaded.
2017-10-03 12:27:22 +00:00
+row("foot")
+cell returns
+cell #[code StringStore]
+cell The #[code StringStore] object.
2017-05-28 23:06:49 +00:00
+h(2, "util") Utilities
+h(3, "hash_string") strings.hash_string
+tag function
p Get a 64-bit hash for a given string.
+aside-code("Example").
from spacy.strings import hash_string
assert hash_string(u'apple') == 8566208034543834098
+table(["Name", "Type", "Description"])
+row
+cell #[code string]
+cell unicode
+cell The string to hash.
2017-10-03 12:27:22 +00:00
+row("foot")
2017-05-28 23:06:49 +00:00
+cell returns
+cell uint64
+cell The hash.