diff --git a/genienlp/tasks/hf_dataset.py b/genienlp/tasks/hf_dataset.py index 5ce611de..a8a941d8 100644 --- a/genienlp/tasks/hf_dataset.py +++ b/genienlp/tasks/hf_dataset.py @@ -72,22 +72,22 @@ class HFDataset(CQA): train_data, validation_data, test_data = None, None, None train_path, validation_path, test_path = None, None, None if train: - train_data = load_dataset(name, split='train', cache_dir=root) + train_data = load_dataset(name, split='train', cache_dir=root, keep_in_memory=False) train_path = train_data.cache_files[0]['filename'] if validation: - validation_data = load_dataset(name, split=validation, cache_dir=root) + validation_data = load_dataset(name, split=validation, cache_dir=root, keep_in_memory=False) validation_path = validation_data.cache_files[0]['filename'] if test: - test_data = load_dataset(name, split='test', cache_dir=root) + test_data = load_dataset(name, split='test', cache_dir=root, keep_in_memory=False) test_path = test_data.cache_files[0]['filename'] if kwargs.pop('hf_test_overfit', False): # override validation/ test data with train data if validation: - validation_data = load_dataset(name, split='train', cache_dir=root) + validation_data = load_dataset(name, split='train', cache_dir=root, keep_in_memory=False) validation_path = validation_data.cache_files[0]['filename'] if test: - test_data = load_dataset(name, split='train', cache_dir=root) + test_data = load_dataset(name, split='train', cache_dir=root, keep_in_memory=False) test_path = test_data.cache_files[0]['filename'] train_data = None if train is None else cls(train_data, **kwargs) diff --git a/setup.py b/setup.py index e135684d..cd61a22c 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ setuptools.setup( 'pyrouge>=0.1.3', 'sacrebleu~=1.0', 'requests~=2.22', - 'datasets==1.5.0', + 'datasets==1.6.2', 'seqeval==1.2.2', 'transformers==4.5.1', 'sentencepiece==0.1.*',