spaCy/docs/source/example_wsj0001.json

338 lines
8.0 KiB
JSON

{
"id": "wsj_0001",
"paragraphs": [
{
"raw": "Pierre Vinken, 61 years old, will join the board as a nonexecutive director Nov. 29. Mr. Vinken is chairman of Elsevier N.V., the Dutch publishing group.",
"segmented": "Pierre Vinken<SEP>, 61 years old<SEP>, will join the board as a nonexecutive director Nov. 29<SEP>.<SENT>Mr. Vinken is chairman of Elsevier N.V.<SEP>, the Dutch publishing group<SEP>.",
"sents": [
0,
85
],
"tokens": [
{
"dep": "NMOD",
"start": 0,
"head": 7,
"tag": "NNP",
"orth": "Pierre"
},
{
"dep": "SUB",
"start": 7,
"head": 29,
"tag": "NNP",
"orth": "Vinken"
},
{
"dep": "P",
"start": 13,
"head": 7,
"tag": ",",
"orth": ","
},
{
"dep": "NMOD",
"start": 15,
"head": 18,
"tag": "CD",
"orth": "61"
},
{
"dep": "AMOD",
"start": 18,
"head": 24,
"tag": "NNS",
"orth": "years"
},
{
"dep": "NMOD",
"start": 24,
"head": 7,
"tag": "JJ",
"orth": "old"
},
{
"dep": "P",
"start": 27,
"head": 7,
"tag": ",",
"orth": ","
},
{
"dep": "ROOT",
"start": 29,
"head": -1,
"tag": "MD",
"orth": "will"
},
{
"dep": "VC",
"start": 34,
"head": 29,
"tag": "VB",
"orth": "join"
},
{
"dep": "NMOD",
"start": 39,
"head": 43,
"tag": "DT",
"orth": "the"
},
{
"dep": "OBJ",
"start": 43,
"head": 34,
"tag": "NN",
"orth": "board"
},
{
"dep": "VMOD",
"start": 49,
"head": 34,
"tag": "IN",
"orth": "as"
},
{
"dep": "NMOD",
"start": 52,
"head": 67,
"tag": "DT",
"orth": "a"
},
{
"dep": "NMOD",
"start": 54,
"head": 67,
"tag": "JJ",
"orth": "nonexecutive"
},
{
"dep": "PMOD",
"start": 67,
"head": 49,
"tag": "NN",
"orth": "director"
},
{
"dep": "VMOD",
"start": 76,
"head": 34,
"tag": "NNP",
"orth": "Nov."
},
{
"dep": "NMOD",
"start": 81,
"head": 76,
"tag": "CD",
"orth": "29"
},
{
"dep": "P",
"start": 83,
"head": 29,
"tag": ".",
"orth": "."
},
{
"dep": "NMOD",
"start": 85,
"head": 89,
"tag": "NNP",
"orth": "Mr."
},
{
"dep": "SUB",
"start": 89,
"head": 96,
"tag": "NNP",
"orth": "Vinken"
},
{
"dep": "ROOT",
"start": 96,
"head": -1,
"tag": "VBZ",
"orth": "is"
},
{
"dep": "PRD",
"start": 99,
"head": 96,
"tag": "NN",
"orth": "chairman"
},
{
"dep": "NMOD",
"start": 108,
"head": 99,
"tag": "IN",
"orth": "of"
},
{
"dep": "NMOD",
"start": 111,
"head": 120,
"tag": "NNP",
"orth": "Elsevier"
},
{
"dep": "NMOD",
"start": 120,
"head": 147,
"tag": "NNP",
"orth": "N.V."
},
{
"dep": "P",
"start": 124,
"head": 147,
"tag": ",",
"orth": ","
},
{
"dep": "NMOD",
"start": 126,
"head": 147,
"tag": "DT",
"orth": "the"
},
{
"dep": "NMOD",
"start": 130,
"head": 147,
"tag": "NNP",
"orth": "Dutch"
},
{
"dep": "NMOD",
"start": 136,
"head": 147,
"tag": "VBG",
"orth": "publishing"
},
{
"dep": "PMOD",
"start": 147,
"head": 108,
"tag": "NN",
"orth": "group"
},
{
"dep": "P",
"start": 152,
"head": 96,
"tag": ".",
"orth": "."
}
],
"brackets": [
{
"start": 0,
"end": 7,
"label": "NP"
},
{
"start": 15,
"end": 18,
"label": "NP"
},
{
"start": 15,
"end": 24,
"label": "ADJP"
},
{
"start": 0,
"end": 27,
"label": "NP-SBJ"
},
{
"start": 39,
"end": 43,
"label": "NP"
},
{
"start": 52,
"end": 67,
"label": "NP"
},
{
"start": 49,
"end": 67,
"label": "PP-CLR"
},
{
"start": 76,
"end": 81,
"label": "NP-TMP"
},
{
"start": 34,
"end": 81,
"label": "VP"
},
{
"start": 29,
"end": 81,
"label": "VP"
},
{
"start": 0,
"end": 83,
"label": "S"
},
{
"start": 85,
"end": 89,
"label": "NP-SBJ"
},
{
"start": 99,
"end": 99,
"label": "NP"
},
{
"start": 111,
"end": 120,
"label": "NP"
},
{
"start": 126,
"end": 147,
"label": "NP"
},
{
"start": 111,
"end": 147,
"label": "NP"
},
{
"start": 108,
"end": 147,
"label": "PP"
},
{
"start": 99,
"end": 147,
"label": "NP-PRD"
},
{
"start": 96,
"end": 147,
"label": "VP"
},
{
"start": 85,
"end": 152,
"label": "S"
}
]
}
]
}