From aad9551d7ca3931602545e18fc149abc7aec1424 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 May 2021 13:03:10 +0000 Subject: [PATCH 1/2] Bump datasets from 1.5.0 to 1.6.2 Bumps [datasets](https://github.com/huggingface/datasets) from 1.5.0 to 1.6.2. - [Release notes](https://github.com/huggingface/datasets/releases) - [Commits](https://github.com/huggingface/datasets/compare/1.5.0...1.6.2) Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5e8c0d46..a086ca3c 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ setuptools.setup( 'pyrouge>=0.1.3', 'sacrebleu~=1.0', 'requests~=2.22', - 'datasets==1.5.0', + 'datasets==1.6.2', 'seqeval==1.2.2', 'transformers==4.5.1', 'sentencepiece==0.1.*', From 439a785bf653920abd488d0c5c3ef0003b5d02bc Mon Sep 17 00:00:00 2001 From: mehrad Date: Fri, 21 May 2021 12:12:21 -0700 Subject: [PATCH 2/2] Workaround to access cache_files see https://github.com/huggingface/datasets/issues/2387 --- genienlp/tasks/hf_dataset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/genienlp/tasks/hf_dataset.py b/genienlp/tasks/hf_dataset.py index 5ce611de..a8a941d8 100644 --- a/genienlp/tasks/hf_dataset.py +++ b/genienlp/tasks/hf_dataset.py @@ -72,22 +72,22 @@ class HFDataset(CQA): train_data, validation_data, test_data = None, None, None train_path, validation_path, test_path = None, None, None if train: - train_data = load_dataset(name, split='train', cache_dir=root) + train_data = load_dataset(name, split='train', cache_dir=root, keep_in_memory=False) train_path = train_data.cache_files[0]['filename'] if validation: - validation_data = load_dataset(name, split=validation, cache_dir=root) + validation_data = load_dataset(name, split=validation, cache_dir=root, keep_in_memory=False) validation_path = validation_data.cache_files[0]['filename'] if test: - test_data = load_dataset(name, split='test', cache_dir=root) + test_data = load_dataset(name, split='test', cache_dir=root, keep_in_memory=False) test_path = test_data.cache_files[0]['filename'] if kwargs.pop('hf_test_overfit', False): # override validation/ test data with train data if validation: - validation_data = load_dataset(name, split='train', cache_dir=root) + validation_data = load_dataset(name, split='train', cache_dir=root, keep_in_memory=False) validation_path = validation_data.cache_files[0]['filename'] if test: - test_data = load_dataset(name, split='train', cache_dir=root) + test_data = load_dataset(name, split='train', cache_dir=root, keep_in_memory=False) test_path = test_data.cache_files[0]['filename'] train_data = None if train is None else cls(train_data, **kwargs)