From aad9551d7ca3931602545e18fc149abc7aec1424 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 3 May 2021 13:03:10 +0000
Subject: [PATCH 1/2] Bump datasets from 1.5.0 to 1.6.2

Bumps [datasets](https://github.com/huggingface/datasets) from 1.5.0 to 1.6.2.
- [Release notes](https://github.com/huggingface/datasets/releases)
- [Commits](https://github.com/huggingface/datasets/compare/1.5.0...1.6.2)

Signed-off-by: dependabot[bot] <support@github.com>
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 5e8c0d46..a086ca3c 100644
--- a/setup.py
+++ b/setup.py
@@ -54,7 +54,7 @@ setuptools.setup(
         'pyrouge>=0.1.3',
         'sacrebleu~=1.0',
         'requests~=2.22',
-        'datasets==1.5.0',
+        'datasets==1.6.2',
         'seqeval==1.2.2',
         'transformers==4.5.1',
         'sentencepiece==0.1.*',

From 439a785bf653920abd488d0c5c3ef0003b5d02bc Mon Sep 17 00:00:00 2001
From: mehrad <mehrad@stanford.edu>
Date: Fri, 21 May 2021 12:12:21 -0700
Subject: [PATCH 2/2] Workaround to access cache_files

see https://github.com/huggingface/datasets/issues/2387
---
 genienlp/tasks/hf_dataset.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/genienlp/tasks/hf_dataset.py b/genienlp/tasks/hf_dataset.py
index 5ce611de..a8a941d8 100644
--- a/genienlp/tasks/hf_dataset.py
+++ b/genienlp/tasks/hf_dataset.py
@@ -72,22 +72,22 @@ class HFDataset(CQA):
         train_data, validation_data, test_data = None, None, None
         train_path, validation_path, test_path = None, None, None
         if train:
-            train_data = load_dataset(name, split='train', cache_dir=root)
+            train_data = load_dataset(name, split='train', cache_dir=root, keep_in_memory=False)
             train_path = train_data.cache_files[0]['filename']
         if validation:
-            validation_data = load_dataset(name, split=validation, cache_dir=root)
+            validation_data = load_dataset(name, split=validation, cache_dir=root, keep_in_memory=False)
             validation_path = validation_data.cache_files[0]['filename']
         if test:
-            test_data = load_dataset(name, split='test', cache_dir=root)
+            test_data = load_dataset(name, split='test', cache_dir=root, keep_in_memory=False)
             test_path = test_data.cache_files[0]['filename']
         
         if kwargs.pop('hf_test_overfit', False):
             # override validation/ test data with train data
             if validation:
-                validation_data = load_dataset(name, split='train', cache_dir=root)
+                validation_data = load_dataset(name, split='train', cache_dir=root, keep_in_memory=False)
                 validation_path = validation_data.cache_files[0]['filename']
             if test:
-                test_data = load_dataset(name, split='train', cache_dir=root)
+                test_data = load_dataset(name, split='train', cache_dir=root, keep_in_memory=False)
                 test_path = test_data.cache_files[0]['filename']
         
         train_data = None if train is None else cls(train_data, **kwargs)