From bbc5488a62caff17c26e1801765621e6fb6313a9 Mon Sep 17 00:00:00 2001 From: thomas chaton Date: Fri, 16 Feb 2024 20:27:20 +0000 Subject: [PATCH] Enable no op optimize (#19490) --- src/lightning/data/processing/data_processor.py | 9 +++++++-- src/lightning/data/processing/utilities.py | 4 ++-- .../tests_data/processing/test_data_processor.py | 16 ++++++++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/lightning/data/processing/data_processor.py b/src/lightning/data/processing/data_processor.py index ccf1eb0c50..ae45009d0c 100644 --- a/src/lightning/data/processing/data_processor.py +++ b/src/lightning/data/processing/data_processor.py @@ -724,7 +724,12 @@ class DataChunkRecipe(DataRecipe): size = sum([c["dim"] if c["dim"] is not None else c["chunk_size"] for c in config["chunks"]]) num_bytes = sum([c["chunk_bytes"] for c in config["chunks"]]) - data_format = tree_unflatten(config["config"]["data_format"], treespec_loads(config["config"]["data_spec"])) + if config["config"] is not None: + data_format = tree_unflatten( + config["config"]["data_format"], treespec_loads(config["config"]["data_spec"]) + ) + else: + data_format = None num_chunks = len(config["chunks"]) # The platform can't store more than 1024 entries. @@ -735,7 +740,7 @@ class DataChunkRecipe(DataRecipe): size=size, num_bytes=num_bytes, data_format=data_format, - compression=config["config"]["compression"], + compression=config["config"]["compression"] if config["config"] else None, num_chunks=len(config["chunks"]), num_bytes_per_chunk=num_bytes_per_chunk, ) diff --git a/src/lightning/data/processing/utilities.py b/src/lightning/data/processing/utilities.py index 9d428dc40a..0ed0b12e6a 100644 --- a/src/lightning/data/processing/utilities.py +++ b/src/lightning/data/processing/utilities.py @@ -2,7 +2,7 @@ import io import os import urllib from contextlib import contextmanager -from subprocess import Popen +from subprocess import DEVNULL, Popen from typing import Any, Callable, List, Optional, Tuple, Union from lightning.data.constants import _IS_IN_STUDIO, _LIGHTNING_CLOUD_LATEST @@ -134,7 +134,7 @@ def optimize_dns(enable: bool) -> None: f"sudo /home/zeus/miniconda3/envs/cloudspace/bin/python" f" -c 'from lightning.data.processing.utilities import _optimize_dns; _optimize_dns({enable})'" ) - Popen(cmd, shell=True).wait() # E501 + Popen(cmd, shell=True, stdout=DEVNULL, stderr=DEVNULL).wait() # E501 def _optimize_dns(enable: bool) -> None: diff --git a/tests/tests_data/processing/test_data_processor.py b/tests/tests_data/processing/test_data_processor.py index d3c40a8905..f95f2404d0 100644 --- a/tests/tests_data/processing/test_data_processor.py +++ b/tests/tests_data/processing/test_data_processor.py @@ -1023,3 +1023,19 @@ def test_map_is_last(num_workers, expected, tmpdir): ) assert sorted(os.listdir(tmpdir)) == expected + + +def no_op(index): + pass + + +def test_empty_optimize(tmpdir): + optimize( + no_op, + list(range(10)), + output_dir=str(tmpdir), + chunk_bytes="64MB", + num_workers=1, + ) + + assert os.listdir(tmpdir) == ["index.json"]