mirror of https://github.com/explosion/spaCy.git
338 lines
8.0 KiB
JSON
338 lines
8.0 KiB
JSON
{
|
|
"id": "wsj_0001",
|
|
"paragraphs": [
|
|
{
|
|
"raw": "Pierre Vinken, 61 years old, will join the board as a nonexecutive director Nov. 29. Mr. Vinken is chairman of Elsevier N.V., the Dutch publishing group.",
|
|
|
|
"segmented": "Pierre Vinken<SEP>, 61 years old<SEP>, will join the board as a nonexecutive director Nov. 29<SEP>.<SENT>Mr. Vinken is chairman of Elsevier N.V.<SEP>, the Dutch publishing group<SEP>.",
|
|
|
|
"sents": [
|
|
0,
|
|
85
|
|
],
|
|
|
|
"tokens": [
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 0,
|
|
"head": 7,
|
|
"tag": "NNP",
|
|
"orth": "Pierre"
|
|
},
|
|
{
|
|
"dep": "SUB",
|
|
"start": 7,
|
|
"head": 29,
|
|
"tag": "NNP",
|
|
"orth": "Vinken"
|
|
},
|
|
{
|
|
"dep": "P",
|
|
"start": 13,
|
|
"head": 7,
|
|
"tag": ",",
|
|
"orth": ","
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 15,
|
|
"head": 18,
|
|
"tag": "CD",
|
|
"orth": "61"
|
|
},
|
|
{
|
|
"dep": "AMOD",
|
|
"start": 18,
|
|
"head": 24,
|
|
"tag": "NNS",
|
|
"orth": "years"
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 24,
|
|
"head": 7,
|
|
"tag": "JJ",
|
|
"orth": "old"
|
|
},
|
|
{
|
|
"dep": "P",
|
|
"start": 27,
|
|
"head": 7,
|
|
"tag": ",",
|
|
"orth": ","
|
|
},
|
|
{
|
|
"dep": "ROOT",
|
|
"start": 29,
|
|
"head": -1,
|
|
"tag": "MD",
|
|
"orth": "will"
|
|
},
|
|
{
|
|
"dep": "VC",
|
|
"start": 34,
|
|
"head": 29,
|
|
"tag": "VB",
|
|
"orth": "join"
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 39,
|
|
"head": 43,
|
|
"tag": "DT",
|
|
"orth": "the"
|
|
},
|
|
{
|
|
"dep": "OBJ",
|
|
"start": 43,
|
|
"head": 34,
|
|
"tag": "NN",
|
|
"orth": "board"
|
|
},
|
|
{
|
|
"dep": "VMOD",
|
|
"start": 49,
|
|
"head": 34,
|
|
"tag": "IN",
|
|
"orth": "as"
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 52,
|
|
"head": 67,
|
|
"tag": "DT",
|
|
"orth": "a"
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 54,
|
|
"head": 67,
|
|
"tag": "JJ",
|
|
"orth": "nonexecutive"
|
|
},
|
|
{
|
|
"dep": "PMOD",
|
|
"start": 67,
|
|
"head": 49,
|
|
"tag": "NN",
|
|
"orth": "director"
|
|
},
|
|
{
|
|
"dep": "VMOD",
|
|
"start": 76,
|
|
"head": 34,
|
|
"tag": "NNP",
|
|
"orth": "Nov."
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 81,
|
|
"head": 76,
|
|
"tag": "CD",
|
|
"orth": "29"
|
|
},
|
|
{
|
|
"dep": "P",
|
|
"start": 83,
|
|
"head": 29,
|
|
"tag": ".",
|
|
"orth": "."
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 85,
|
|
"head": 89,
|
|
"tag": "NNP",
|
|
"orth": "Mr."
|
|
},
|
|
{
|
|
"dep": "SUB",
|
|
"start": 89,
|
|
"head": 96,
|
|
"tag": "NNP",
|
|
"orth": "Vinken"
|
|
},
|
|
{
|
|
"dep": "ROOT",
|
|
"start": 96,
|
|
"head": -1,
|
|
"tag": "VBZ",
|
|
"orth": "is"
|
|
},
|
|
{
|
|
"dep": "PRD",
|
|
"start": 99,
|
|
"head": 96,
|
|
"tag": "NN",
|
|
"orth": "chairman"
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 108,
|
|
"head": 99,
|
|
"tag": "IN",
|
|
"orth": "of"
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 111,
|
|
"head": 120,
|
|
"tag": "NNP",
|
|
"orth": "Elsevier"
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 120,
|
|
"head": 147,
|
|
"tag": "NNP",
|
|
"orth": "N.V."
|
|
},
|
|
{
|
|
"dep": "P",
|
|
"start": 124,
|
|
"head": 147,
|
|
"tag": ",",
|
|
"orth": ","
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 126,
|
|
"head": 147,
|
|
"tag": "DT",
|
|
"orth": "the"
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 130,
|
|
"head": 147,
|
|
"tag": "NNP",
|
|
"orth": "Dutch"
|
|
},
|
|
{
|
|
"dep": "NMOD",
|
|
"start": 136,
|
|
"head": 147,
|
|
"tag": "VBG",
|
|
"orth": "publishing"
|
|
},
|
|
{
|
|
"dep": "PMOD",
|
|
"start": 147,
|
|
"head": 108,
|
|
"tag": "NN",
|
|
"orth": "group"
|
|
},
|
|
{
|
|
"dep": "P",
|
|
"start": 152,
|
|
"head": 96,
|
|
"tag": ".",
|
|
"orth": "."
|
|
}
|
|
],
|
|
"brackets": [
|
|
{
|
|
"start": 0,
|
|
"end": 7,
|
|
"label": "NP"
|
|
},
|
|
{
|
|
"start": 15,
|
|
"end": 18,
|
|
"label": "NP"
|
|
},
|
|
{
|
|
"start": 15,
|
|
"end": 24,
|
|
"label": "ADJP"
|
|
},
|
|
{
|
|
"start": 0,
|
|
"end": 27,
|
|
"label": "NP-SBJ"
|
|
},
|
|
{
|
|
"start": 39,
|
|
"end": 43,
|
|
"label": "NP"
|
|
},
|
|
{
|
|
"start": 52,
|
|
"end": 67,
|
|
"label": "NP"
|
|
},
|
|
{
|
|
"start": 49,
|
|
"end": 67,
|
|
"label": "PP-CLR"
|
|
},
|
|
{
|
|
"start": 76,
|
|
"end": 81,
|
|
"label": "NP-TMP"
|
|
},
|
|
{
|
|
"start": 34,
|
|
"end": 81,
|
|
"label": "VP"
|
|
},
|
|
{
|
|
"start": 29,
|
|
"end": 81,
|
|
"label": "VP"
|
|
},
|
|
{
|
|
"start": 0,
|
|
"end": 83,
|
|
"label": "S"
|
|
},
|
|
{
|
|
"start": 85,
|
|
"end": 89,
|
|
"label": "NP-SBJ"
|
|
},
|
|
{
|
|
"start": 99,
|
|
"end": 99,
|
|
"label": "NP"
|
|
},
|
|
{
|
|
"start": 111,
|
|
"end": 120,
|
|
"label": "NP"
|
|
},
|
|
{
|
|
"start": 126,
|
|
"end": 147,
|
|
"label": "NP"
|
|
},
|
|
{
|
|
"start": 111,
|
|
"end": 147,
|
|
"label": "NP"
|
|
},
|
|
{
|
|
"start": 108,
|
|
"end": 147,
|
|
"label": "PP"
|
|
},
|
|
{
|
|
"start": 99,
|
|
"end": 147,
|
|
"label": "NP-PRD"
|
|
},
|
|
{
|
|
"start": 96,
|
|
"end": 147,
|
|
"label": "VP"
|
|
},
|
|
{
|
|
"start": 85,
|
|
"end": 152,
|
|
"label": "S"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|