Reorganise entity rules

This commit is contained in:
ines 2017-05-09 01:37:10 +02:00
parent 564939391a
commit b5ca50607e
1 changed files with 25 additions and 180 deletions

View File

@ -5,188 +5,33 @@ from ..symbols import ORTH, ENT_TYPE, LOWER
ENT_ID = "ent_id" ENT_ID = "ent_id"
ENTITY_RULES = []
ENTITY_RULES = [ for name, tag, patterns in [
{ ("Reddit", "PRODUCT", [[{LOWER: "reddit"}]]),
ENT_ID: "Reddit", ("Linux", "PRODUCT", [[{LOWER: "linux"}]]),
"attrs": {ENT_TYPE: "PRODUCT"}, ("Haskell", "PRODUCT", [[{LOWER: "haskell"}]]),
"patterns": [ ("HaskellCurry", "PERSON", [[{LOWER: "haskell"}, {LOWER: "curry"}]]),
[{LOWER: "reddit"}] ("Javascript", "PRODUCT", [[{LOWER: "javascript"}]]),
] ("CSS", "PRODUCT", [[{LOWER: "css"}], [{LOWER: "css3"}]]),
}, ("HTML", "PRODUCT", [[{LOWER: "html"}], [{LOWER: "html5"}]]),
("Python", "PRODUCT", [[{ORTH: "Python"}]]),
{ ("Ruby", "PRODUCT", [[{ORTH: "Ruby"}]]),
ENT_ID: "Linux", ("spaCy", "PRODUCT", [[{LOWER: "spacy"}]]),
"attrs": {ENT_TYPE: "PRODUCT"}, ("displaCy", "PRODUCT", [[{LOWER: "displacy"}]]),
"patterns": [ ("Digg", "PRODUCT", [[{LOWER: "digg"}]]),
[{LOWER: "linux"}] ("FoxNews", "ORG", [[{LOWER: "foxnews"}], [{LOWER: "fox"}, {LOWER: "news"}]]),
] ("Google", "ORG", [[{LOWER: "google"}]]),
}, ("Mac", "PRODUCT", [[{LOWER: "mac"}]]),
("Wikipedia", "PRODUCT", [[{LOWER: "wikipedia"}]]),
{ ("Windows", "PRODUCT", [[{LOWER: "windows"}]]),
ENT_ID: "Haskell", ("Dell", "ORG", [[{LOWER: "dell"}]]),
"attrs": {ENT_TYPE: "PRODUCT"}, ("Facebook", "ORG", [[{LOWER: "facebook"}]]),
"patterns": [ ("Blizzard", "ORG", [[{LOWER: "blizzard"}]]),
[{LOWER: "haskell"}], ("Ubuntu", "ORG", [[{LOWER: "ubuntu"}]]),
] ("YouTube", "PRODUCT", [[{LOWER: "youtube"}]]),]:
}, ENTITY_RULES.append({ENT_ID: name, 'attrs': {ENT_TYPE: tag}, 'patterns': patterns})
{
ENT_ID: "HaskellCurry",
"attrs": {ENT_TYPE: "PERSON"},
"patterns": [
[{LOWER: "haskell"}, {LOWER: "curry"}]
]
},
{
ENT_ID: "Javascript",
"attrs": {ENT_TYPE: "PRODUCT"},
"patterns": [
[{LOWER: "javascript"}],
]
},
{
ENT_ID: "CSS",
"attrs": {ENT_TYPE: "PRODUCT"},
"patterns": [
[{LOWER: "css"}],
[{LOWER: "css3"}],
]
},
{
ENT_ID: "HTML",
"attrs": {ENT_TYPE: "PRODUCT"},
"patterns": [
[{LOWER: "html"}],
[{LOWER: "html5"}],
]
},
{
ENT_ID: "Python",
"attrs": {ENT_TYPE: "PRODUCT"},
"patterns": [
[{ORTH: "Python"}]
]
},
{
ENT_ID: "Ruby",
"attrs": {ENT_TYPE: "PRODUCT"},
"patterns": [
[{ORTH: "Ruby"}]
]
},
{
ENT_ID: "spaCy",
"attrs": {ENT_TYPE: "PRODUCT"},
"patterns": [
[{LOWER: "spacy"}]
]
},
{
ENT_ID: "displaCy",
"attrs": {ENT_TYPE: "PRODUCT"},
"patterns": [
[{LOWER: "displacy"}]
]
},
{
ENT_ID: "Digg",
"attrs": {ENT_TYPE: "PRODUCT"},
"patterns": [
[{LOWER: "digg"}]
]
},
{
ENT_ID: "FoxNews",
"attrs": {ENT_TYPE: "ORG"},
"patterns": [
[{LOWER: "foxnews"}],
[{LOWER: "fox"}, {LOWER: "news"}]
]
},
{
ENT_ID: "Google",
"attrs": {ENT_TYPE: "ORG"},
"patterns": [
[{LOWER: "google"}]
]
},
{
ENT_ID: "Mac",
"attrs": {ENT_TYPE: "PRODUCT"},
"patterns": [
[{LOWER: "mac"}]
]
},
{
ENT_ID: "Wikipedia",
"attrs": {ENT_TYPE: "PRODUCT"},
"patterns": [
[{LOWER: "wikipedia"}]
]
},
{
ENT_ID: "Windows",
"attrs": {ENT_TYPE: "PRODUCT"},
"patterns": [
[{ORTH: "Windows"}]
]
},
{
ENT_ID: "Dell",
"attrs": {ENT_TYPE: "ORG"},
"patterns": [
[{LOWER: "dell"}]
]
},
{
ENT_ID: "Facebook",
"attrs": {ENT_TYPE: "ORG"},
"patterns": [
[{LOWER: "facebook"}]
]
},
{
ENT_ID: "Blizzard",
"attrs": {ENT_TYPE: "ORG"},
"patterns": [
[{ORTH: "Blizzard"}]
]
},
{
ENT_ID: "Ubuntu",
"attrs": {ENT_TYPE: "ORG"},
"patterns": [
[{ORTH: "Ubuntu"}]
]
},
{
ENT_ID: "YouTube",
"attrs": {ENT_TYPE: "PRODUCT"},
"patterns": [
[{LOWER: "youtube"}]
]
}
]
FALSE_POSITIVES = [ FALSE_POSITIVES = [