Add Chinese PTB tags to glossary (#7993)

This commit is contained in:
Adriane Boyd 2021-05-06 10:43:03 +02:00 committed by GitHub
parent 0a22fed634
commit cc5aeaed29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 36 additions and 2 deletions

View File

@ -58,7 +58,7 @@ GLOSSARY = {
"FW": "foreign word",
"HYPH": "punctuation mark, hyphen",
"IN": "conjunction, subordinating or preposition",
"JJ": "adjective",
"JJ": "adjective (English), other noun-modifier (Chinese)",
"JJR": "adjective, comparative",
"JJS": "adjective, superlative",
"LS": "list item marker",
@ -88,7 +88,7 @@ GLOSSARY = {
"WP": "wh-pronoun, personal",
"WP$": "wh-pronoun, possessive",
"WRB": "wh-adverb",
"SP": "space",
"SP": "space (English), sentence-final particle (Chinese)",
"ADD": "email",
"NFP": "superfluous punctuation",
"GW": "additional word in multi-word expression",
@ -152,6 +152,40 @@ GLOSSARY = {
"VVIZU": 'infinitive with "zu", full',
"VVPP": "perfect participle, full",
"XY": "non-word containing non-letter",
# POS Tags (Chinese)
# OntoNotes / Chinese Penn Treebank
# https://repository.upenn.edu/cgi/viewcontent.cgi?article=1039&context=ircs_reports
"AD": "adverb",
"AS": "aspect marker",
"BA": "把 in ba-construction",
# "CD": "cardinal number",
"CS": "subordinating conjunction",
"DEC": "的 in a relative clause",
"DEG": "associative 的",
"DER": "得 in V-de const. and V-de-R",
"DEV": "地 before VP",
"ETC": "for words 等, 等等",
# "FW": "foreign words"
"IJ": "interjection",
# "JJ": "other noun-modifier",
"LB": "被 in long bei-const",
"LC": "localizer",
"M": "measure word",
"MSP": "other particle",
# "NN": "common noun",
"NR": "proper noun",
"NT": "temporal noun",
"OD": "ordinal number",
"ON": "onomatopoeia",
"P": "preposition excluding 把 and 被",
"PN": "pronoun",
"PU": "punctuation",
"SB": "被 in short bei-const",
# "SP": "sentence-final particle",
"VA": "predicative adjective",
"VC": "是 (copula)",
"VE": "有 as the main verb",
"VV": "other verb",
# Noun chunks
"NP": "noun phrase",
"PP": "prepositional phrase",