mirror of https://github.com/explosion/spaCy.git
Add note on tags matching tokenization (see #1613)
This commit is contained in:
parent
ac235c0baf
commit
ec08996000
|
@ -30,8 +30,11 @@ TAG_MAP = {
|
||||||
'J': {'pos': 'ADJ'}
|
'J': {'pos': 'ADJ'}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Usually you'll read this in, of course. Data formats vary.
|
# Usually you'll read this in, of course. Data formats vary. Ensure your
|
||||||
# Ensure your strings are unicode.
|
# strings are unicode and that the number of tags assigned matches spaCy's
|
||||||
|
# tokenization. If not, you can always add a 'words' key to the annotations
|
||||||
|
# that specifies the gold-standard tokenization, e.g.:
|
||||||
|
# ("Eatblueham", {'words': ['Eat', 'blue', 'ham'] 'tags': ['V', 'J', 'N']})
|
||||||
TRAIN_DATA = [
|
TRAIN_DATA = [
|
||||||
("I like green eggs", {'tags': ['N', 'V', 'J', 'N']}),
|
("I like green eggs", {'tags': ['N', 'V', 'J', 'N']}),
|
||||||
("Eat blue ham", {'tags': ['V', 'J', 'N']})
|
("Eat blue ham", {'tags': ['V', 'J', 'N']})
|
||||||
|
|
Loading…
Reference in New Issue