mirror of https://github.com/explosion/spaCy.git
Warning goldparse (#4851)
* label in span not writable anymore
* Revert "label in span not writable anymore"
This reverts commit ab442338c8
.
* provide more friendly error msg for parsing file
This commit is contained in:
parent
83e0a6f3e3
commit
581eeed98b
|
@ -534,6 +534,7 @@ class Errors(object):
|
||||||
"make sure the gold EL data refers to valid results of the "
|
"make sure the gold EL data refers to valid results of the "
|
||||||
"named entity recognizer in the `nlp` pipeline.")
|
"named entity recognizer in the `nlp` pipeline.")
|
||||||
# TODO: fix numbering after merging develop into master
|
# TODO: fix numbering after merging develop into master
|
||||||
|
E996 = ("Could not parse {file}: {msg}")
|
||||||
E997 = ("Tokenizer special cases are not allowed to modify the text. "
|
E997 = ("Tokenizer special cases are not allowed to modify the text. "
|
||||||
"This would map '{chunk}' to '{orth}' given token attributes "
|
"This would map '{chunk}' to '{orth}' given token attributes "
|
||||||
"'{token_attrs}'.")
|
"'{token_attrs}'.")
|
||||||
|
|
|
@ -194,9 +194,10 @@ class GoldCorpus(object):
|
||||||
i = 0
|
i = 0
|
||||||
for loc in locs:
|
for loc in locs:
|
||||||
loc = util.ensure_path(loc)
|
loc = util.ensure_path(loc)
|
||||||
if loc.parts[-1].endswith("json"):
|
file_name = loc.parts[-1]
|
||||||
|
if file_name.endswith("json"):
|
||||||
examples = read_json_file(loc)
|
examples = read_json_file(loc)
|
||||||
elif loc.parts[-1].endswith("jsonl"):
|
elif file_name.endswith("jsonl"):
|
||||||
gold_tuples = srsly.read_jsonl(loc)
|
gold_tuples = srsly.read_jsonl(loc)
|
||||||
first_gold_tuple = next(gold_tuples)
|
first_gold_tuple = next(gold_tuples)
|
||||||
gold_tuples = itertools.chain([first_gold_tuple], gold_tuples)
|
gold_tuples = itertools.chain([first_gold_tuple], gold_tuples)
|
||||||
|
@ -212,17 +213,24 @@ class GoldCorpus(object):
|
||||||
doc = ex_dict.get("text", None)
|
doc = ex_dict.get("text", None)
|
||||||
examples.append(Example.from_dict(ex_dict, doc=doc))
|
examples.append(Example.from_dict(ex_dict, doc=doc))
|
||||||
|
|
||||||
elif loc.parts[-1].endswith("msg"):
|
elif file_name.endswith("msg"):
|
||||||
text, ex_dict = srsly.read_msgpack(loc)
|
text, ex_dict = srsly.read_msgpack(loc)
|
||||||
examples = [Example.from_dict(ex_dict, doc=text)]
|
examples = [Example.from_dict(ex_dict, doc=text)]
|
||||||
else:
|
else:
|
||||||
supported = ("json", "jsonl", "msg")
|
supported = ("json", "jsonl", "msg")
|
||||||
raise ValueError(Errors.E124.format(path=loc, formats=supported))
|
raise ValueError(Errors.E124.format(path=loc, formats=supported))
|
||||||
for example in examples:
|
try:
|
||||||
yield example
|
for example in examples:
|
||||||
i += 1
|
yield example
|
||||||
if limit and i >= limit:
|
i += 1
|
||||||
return
|
if limit and i >= limit:
|
||||||
|
return
|
||||||
|
except KeyError as e:
|
||||||
|
msg = "Missing key {}".format(e)
|
||||||
|
raise KeyError(Errors.E996.format(file=file_name, msg=msg))
|
||||||
|
except UnboundLocalError as e:
|
||||||
|
msg = "Unexpected document structure"
|
||||||
|
raise ValueError(Errors.E996.format(file=file_name, msg=msg))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dev_examples(self):
|
def dev_examples(self):
|
||||||
|
|
Loading…
Reference in New Issue