From e58dca302889164230d5bfaf3761e252136824d6 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 30 Sep 2020 16:52:27 +0200 Subject: [PATCH] Add read_labels --- spacy/training/corpus.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/spacy/training/corpus.py b/spacy/training/corpus.py index 85079f41c..8be56b9e6 100644 --- a/spacy/training/corpus.py +++ b/spacy/training/corpus.py @@ -43,6 +43,15 @@ def create_jsonl_reader( return JsonlTexts(path, min_length=min_length, max_length=max_length, limit=limit) +@util.registry.readers("spacy.read_labels.v1") +def read_labels(path: Path, *, require: bool=False): + # I decided not to give this a generic name, because I don't want people to + # use it for arbitrary stuff, as I want this require arg with default False. + if not require and not path.exists(): + return None + return srsly.read_json(path) + + def walk_corpus(path: Union[str, Path], file_type) -> List[Path]: path = util.ensure_path(path) if not path.is_dir() and path.parts[-1].endswith(file_type):